From fdca87d37579f5e2fb460ec2439c8238722fcc61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 7 Sep 2024 07:06:30 +0200 Subject: [PATCH 1/5] Only allocate BEAM registers for instructions that produce values --- lib/compiler/src/beam_ssa_pre_codegen.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index a985b7c8c3b5..d101917d53d6 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -2582,16 +2582,17 @@ reserve_zregs(RPO, Blocks, Intervals, Res) -> reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}, #b_set{op={bif,'=:='},args=[Dst,Val],dst=Bool}], - Last, ShortLived, A) -> + Last, ShortLived, A0) -> case {Val,Last} of {#b_literal{val=Arity},#b_br{bool=Bool}} when Arity bsr 32 =:= 0 -> %% These two instructions can be combined to a test_arity %% instruction provided that the arity variable is short-lived. - reserve_test_zreg(Dst, ShortLived, A); + A1 = reserve_test_zreg(Dst, ShortLived, A0), + reserve_test_zreg(Bool, ShortLived, A1); {_,_} -> %% Either the arity is too big, or the boolean value is not %% used in a conditional branch. - A + A0 end; reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}], #b_switch{arg=Dst}, ShortLived, A) -> @@ -2616,6 +2617,7 @@ use_zreg(bs_set_position) -> yes; use_zreg(executable_line) -> yes; use_zreg(kill_try_tag) -> yes; use_zreg(landingpad) -> yes; +use_zreg(nif_start) -> yes; use_zreg(recv_marker_bind) -> yes; use_zreg(recv_marker_clear) -> yes; use_zreg(remove_message) -> yes; From 9def2260d1b0d6e1f4e4590dcca6ec64666c5af5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 9 Sep 2024 06:12:29 +0200 Subject: [PATCH 2/5] sys_coverage: Generate new variables with invalid names Temporary variables generated by compiler passes must have names that are not allowed in Erlang source code to ensure that they will not be included in debug information. --- lib/compiler/src/sys_coverage.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compiler/src/sys_coverage.erl b/lib/compiler/src/sys_coverage.erl index b5265ae8553f..653aa97495db 100644 --- a/lib/compiler/src/sys_coverage.erl +++ b/lib/compiler/src/sys_coverage.erl @@ -193,7 +193,7 @@ bool_switch(E, T, F, AllVars, AuxVarN) -> [{tuple,Anno,[{atom,Anno,badarg},AuxVar]}]}]}]}. aux_var(Vars, N) -> - Name = list_to_atom(lists:concat(['_', N])), + Name = list_to_atom(lists:concat(["cov", N])), case sets:is_element(Name, Vars) of true -> aux_var(Vars, N + 1); false -> Name From af2ebf13bf6dbf18d7e324ffdfa6310deed75ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 31 Aug 2024 07:32:59 +0200 Subject: [PATCH 3/5] beam_ssa: Add `split_blocks_after/4` Add `split_blocks_after/4` to split the block after the instruction for which the predicate returns `true`. Rename `split_blocks/4` to `split_blocks_before/4`. --- lib/compiler/src/beam_ssa.erl | 56 +++++++++++++++++------ lib/compiler/src/beam_ssa_opt.erl | 2 +- lib/compiler/src/beam_ssa_pre_codegen.erl | 3 +- 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl index 9c84d963634b..66b2e67c2e39 100644 --- a/lib/compiler/src/beam_ssa.erl +++ b/lib/compiler/src/beam_ssa.erl @@ -42,7 +42,7 @@ predecessors/1, rename_vars/3, rpo/1,rpo/2, - split_blocks/4, + split_blocks_before/4,split_blocks_after/4, successors/1,successors/2, trim_unreachable/1, used/1,uses/2]). @@ -140,7 +140,7 @@ 'set_tuple_element' | 'succeeded' | 'update_record'. --import(lists, [foldl/3,mapfoldl/3,member/2,reverse/1,sort/1]). +-import(lists, [foldl/3,mapfoldl/3,member/2,reverse/1,reverse/2,sort/1]). -spec add_anno(Key, Value, Construct0) -> Construct when Key :: atom(), @@ -717,7 +717,7 @@ rename_vars(Rename, Labels, Blocks) when is_map(Rename), is_map(Blocks) -> %% block if the predicate returns true for the first instruction in a %% block. --spec split_blocks(Labels, Pred, Blocks0, Count0) -> {Blocks,Count} when +-spec split_blocks_before(Labels, Pred, Blocks0, Count0) -> {Blocks,Count} when Labels :: [label()], Pred :: fun((b_set()) -> boolean()), Blocks :: block_map(), @@ -726,8 +726,25 @@ rename_vars(Rename, Labels, Blocks) when is_map(Rename), is_map(Blocks) -> Blocks :: block_map(), Count :: label(). -split_blocks(Ls, P, Blocks, Count) when is_map(Blocks) -> - split_blocks_1(Ls, P, Blocks, Count). +split_blocks_before(Ls, P, Blocks, Count) when is_map(Blocks) -> + split_blocks_1(Ls, P, fun split_blocks_before_is/3, Blocks, Count). + +%% split_blocks_after(Labels, Predicate, Blocks0, Count0) -> {Blocks,Count}. +%% Call Predicate(Instruction) for each instruction in the given +%% blocks. If Predicate/1 returns true, split the block after this +%% instruction. + +-spec split_blocks_after(Labels, Pred, Blocks0, Count0) -> {Blocks,Count} when + Labels :: [label()], + Pred :: fun((b_set()) -> boolean()), + Blocks :: block_map(), + Count0 :: label(), + Blocks0 :: block_map(), + Blocks :: block_map(), + Count :: label(). + +split_blocks_after(Ls, P, Blocks, Count) when is_map(Blocks) -> + split_blocks_1(Ls, P, fun split_blocks_after_is/3, Blocks, Count). -spec trim_unreachable(SSA0) -> SSA when SSA0 :: block_map() | [{label(),b_blk()}], @@ -1080,9 +1097,9 @@ flatmapfoldl(F, Accu0, [Hd|Tail]) -> {R++Rs,Accu2}; flatmapfoldl(_, Accu, []) -> {[],Accu}. -split_blocks_1([L|Ls], P, Blocks0, Count0) -> +split_blocks_1([L|Ls], P, Split, Blocks0, Count0) -> #b_blk{is=Is0} = Blk = map_get(L, Blocks0), - case split_blocks_is(Is0, P, []) of + case Split(Is0, P, []) of {yes,Bef,Aft} -> NewLbl = Count0, Count = Count0 + 1, @@ -1092,23 +1109,32 @@ split_blocks_1([L|Ls], P, Blocks0, Count0) -> Blocks1 = Blocks0#{L:=BefBlk,NewLbl=>NewBlk}, Successors = successors(NewBlk), Blocks = update_phi_labels(Successors, L, NewLbl, Blocks1), - split_blocks_1([NewLbl|Ls], P, Blocks, Count); + split_blocks_1([NewLbl|Ls], P, Split, Blocks, Count); no -> - split_blocks_1(Ls, P, Blocks0, Count0) + split_blocks_1(Ls, P, Split, Blocks0, Count0) end; -split_blocks_1([], _, Blocks, Count) -> +split_blocks_1([], _, _, Blocks, Count) -> {Blocks,Count}. -split_blocks_is([I|Is], P, []) -> - split_blocks_is(Is, P, [I]); -split_blocks_is([I|Is], P, Acc) -> +split_blocks_before_is([I|Is], P, []) -> + split_blocks_before_is(Is, P, [I]); +split_blocks_before_is([I|Is], P, Acc) -> case P(I) of true -> {yes,reverse(Acc),[I|Is]}; false -> - split_blocks_is(Is, P, [I|Acc]) + split_blocks_before_is(Is, P, [I|Acc]) + end; +split_blocks_before_is([], _, _) -> no. + +split_blocks_after_is([I|Is], P, Acc) -> + case P(I) of + true -> + {yes,reverse(Acc, [I]),Is}; + false -> + split_blocks_after_is(Is, P, [I|Acc]) end; -split_blocks_is([], _, _) -> no. +split_blocks_after_is([], _, _) -> no. update_phi_labels_is([#b_set{op=phi,args=Args0}=I0|Is], Old, New) -> Args = [{Arg,rename_label(Lbl, Old, New)} || {Arg,Lbl} <:- Args0], diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index 82bde660df3b..194ede8f3c40 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -555,7 +555,7 @@ ssa_opt_split_blocks({#opt_st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) -> (_) -> false end, RPO = beam_ssa:rpo(Blocks0), - {Blocks,Count} = beam_ssa:split_blocks(RPO, P, Blocks0, Count0), + {Blocks,Count} = beam_ssa:split_blocks_before(RPO, P, Blocks0, Count0), {St#opt_st{ssa=Blocks,cnt=Count}, FuncDb}. %%% diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index d101917d53d6..356d2ffdd706 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -1453,7 +1453,8 @@ split_rm_blocks([L|Ls], Blocks0, Count0, Acc) -> Op =:= remove_message end, Next = Count0, - {Blocks,Count} = beam_ssa:split_blocks([L], P, Blocks0, Count0), + {Blocks,Count} = beam_ssa:split_blocks_before([L], P, + Blocks0, Count0), true = Count0 =/= Count, %Assertion. split_rm_blocks(Ls, Blocks, Count, [Next|Acc]) end; From 974ad716db74cb69fdf16f0d8939596c496c4e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 5 Aug 2024 13:49:13 +0200 Subject: [PATCH 4/5] compiler: Add support for debug information in BEAM files The `beam_debug_info` compiler option will insert `debug_line` instructions on lines containing executable code, and it will maintain information about which variables the BEAM registers contain at each `debug_line` instruction. This information will be inserted into a "DbgB" chunk in the BEAM file. When a `debug_line` is executed, the current stack frame (if any) is guaranteed to be fully initialized. The number of live X registers is given as the second operand for the `debug_line` instruction (it is guaranteed that there are no "holes"). Here is an example where the debug information translated to text has been inserted as comments before the lines they apply to: %% function entry (no stack frame); x0, x1, x2 are live sum(A, B, _Ignored) -> %% no stack frame; A in x0, B in x1, _Ignored in x2 C = A + B, %% no stack frame; B in x1, C in x0 io:format("~p\n", [C]), %% stack frame size is 1; C in y0 D = 10 * C, %% stack frame size is 1; C in y0, D in x0 {ok,D}. Note that not all variables are available in the debug information. For example, before the call to `io:format/2`, the sum of A and B have overwritten the register that used to hold the value of A, and the value for _Ignore was wiped out by the `+` operation. The size of the current stack frame is also given at each `debug_line` to be able to easily find the beginning of the previous stack frame. --- erts/emulator/beam/emu/ops.tab | 2 + erts/emulator/beam/jit/arm/ops.tab | 2 + erts/emulator/beam/jit/x86/ops.tab | 2 + lib/compiler/src/beam_asm.erl | 151 +++- lib/compiler/src/beam_block.erl | 7 +- lib/compiler/src/beam_core_to_ssa.erl | 112 ++- lib/compiler/src/beam_dict.erl | 35 +- lib/compiler/src/beam_disasm.erl | 7 + lib/compiler/src/beam_flatten.erl | 1 + lib/compiler/src/beam_ssa_alias.erl | 2 + lib/compiler/src/beam_ssa_codegen.erl | 314 +++++++- lib/compiler/src/beam_ssa_pre_codegen.erl | 125 +++- lib/compiler/src/beam_trim.erl | 14 + lib/compiler/src/beam_validator.erl | 43 +- lib/compiler/src/beam_z.erl | 2 + lib/compiler/src/compile.erl | 55 +- lib/compiler/src/core_pp.erl | 14 +- lib/compiler/src/genop.tab | 6 + lib/compiler/src/sys_coverage.erl | 124 ++- lib/compiler/src/v3_core.erl | 38 +- lib/compiler/test/Makefile | 4 + lib/compiler/test/beam_debug_info_SUITE.erl | 706 ++++++++++++++++++ lib/compiler/test/compile_SUITE.erl | 24 +- .../test/compile_SUITE_data/small.erl | 4 +- lib/compiler/test/test_lib.erl | 56 +- lib/dialyzer/src/dialyzer_dataflow.erl | 2 + lib/dialyzer/src/dialyzer_typesig.erl | 2 + lib/stdlib/src/erl_expand_records.erl | 2 + lib/stdlib/src/erl_pp.erl | 2 + 29 files changed, 1716 insertions(+), 142 deletions(-) create mode 100644 lib/compiler/test/beam_debug_info_SUITE.erl diff --git a/erts/emulator/beam/emu/ops.tab b/erts/emulator/beam/emu/ops.tab index 1bc7b93e499c..3a4ddd35ce3b 100644 --- a/erts/emulator/beam/emu/ops.tab +++ b/erts/emulator/beam/emu/ops.tab @@ -97,6 +97,8 @@ line I executable_line _Id _Line => _ +debug_line u u u => _ + # For the JIT, the init_yregs/1 instruction allows generation of better code. # For the BEAM interpreter, though, it will probably be more efficient to # translate all uses of init_yregs/1 back to the instructions that the compiler diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab index 1fe98bb6473e..b17189647b69 100644 --- a/erts/emulator/beam/jit/arm/ops.tab +++ b/erts/emulator/beam/jit/arm/ops.tab @@ -91,6 +91,8 @@ line I executable_line I I +debug_line u u u => _ + allocate t t allocate_heap t I t diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab index 4be0fac96000..3783a3c52ce7 100644 --- a/erts/emulator/beam/jit/x86/ops.tab +++ b/erts/emulator/beam/jit/x86/ops.tab @@ -91,6 +91,8 @@ line I executable_line I I +debug_line u u u => _ + allocate t t allocate_heap t I t diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl index 9f8c111c9743..6d7ac3906a39 100644 --- a/lib/compiler/src/beam_asm.erl +++ b/lib/compiler/src/beam_asm.erl @@ -27,11 +27,14 @@ -export_type([fail/0,label/0,src/0,module_code/0,function_name/0]). --import(lists, [append/1,duplicate/2,map/2,member/2,keymember/3,splitwith/2]). +-import(lists, [append/1,duplicate/2,keymember/3,last/1,map/2, + member/2,splitwith/2]). -include("beam_opcodes.hrl"). -include("beam_asm.hrl"). +-define(BEAM_DEBUG_INFO_VERSION, 0). + %% Common types for describing operands for BEAM instructions. -type src() :: beam_reg() | {'literal',term()} | @@ -60,23 +63,24 @@ -define(BEAMFILE_EXECUTABLE_LINE, 1). -define(BEAMFILE_FORCE_LINE_COUNTERS, 2). --spec module(module_code(), [{binary(), binary()}], [{atom(),term()}], [compile:option()]) -> - {'ok',binary()}. - -module(Code, ExtraChunks, CompileInfo, CompilerOpts) -> - {ok,assemble(Code, ExtraChunks, CompileInfo, CompilerOpts)}. +-spec module(module_code(), [{binary(), binary()}], + [{atom(),term()}], [compile:option()]) -> + {'ok',binary()}. -assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, ExtraChunks, CompileInfo, CompilerOpts) -> +module(Code0, ExtraChunks, CompileInfo, CompilerOpts) -> + {Mod,Exp0,Attr0,Asm0,NumLabels} = Code0, {1,Dict0} = beam_dict:atom(Mod, beam_dict:new()), {0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0), {0,Dict2} = beam_dict:type(any, Dict1), Dict3 = reject_unsupported_versions(Dict2), + NumFuncs = length(Asm0), {Asm,Attr} = on_load(Asm0, Attr0), Exp = sets:from_list(Exp0), - {Code,Dict} = assemble_1(Asm, Exp, Dict3, []), - build_file(Code, Attr, Dict, NumLabels, NumFuncs, - ExtraChunks, CompileInfo, CompilerOpts). + {Code,Dict} = assemble(Asm, Exp, Dict3, []), + Beam = build_file(Code, Attr, Dict, NumLabels, NumFuncs, + ExtraChunks, CompileInfo, CompilerOpts), + {ok,Beam}. reject_unsupported_versions(Dict) -> %% Emit an instruction that was added in our lowest supported @@ -106,7 +110,7 @@ insert_on_load_instruction(Is0, Entry) -> end, Is0), Bef ++ [El,on_load|Is]. -assemble_1([{function,Name,Arity,Entry,Asm}|T], Exp, Dict0, Acc) -> +assemble([{function,Name,Arity,Entry,Asm}|T], Exp, Dict0, Acc) -> Dict1 = case sets:is_element({Name,Arity}, Exp) of true -> beam_dict:export(Name, Arity, Entry, Dict0); @@ -114,8 +118,8 @@ assemble_1([{function,Name,Arity,Entry,Asm}|T], Exp, Dict0, Acc) -> beam_dict:local(Name, Arity, Entry, Dict0) end, {Code, Dict2} = assemble_function(Asm, Acc, Dict1), - assemble_1(T, Exp, Dict2, Code); -assemble_1([], _Exp, Dict0, Acc) -> + assemble(T, Exp, Dict2, Code); +assemble([], _Exp, Dict0, Acc) -> {IntCodeEnd,Dict1} = make_op(int_code_end, Dict0), {list_to_binary(lists:reverse(Acc, [IntCodeEnd])),Dict1}. @@ -125,17 +129,23 @@ assemble_function([H|T], Acc, Dict0) -> assemble_function([], Code, Dict) -> {Code, Dict}. -build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, CompilerOpts) -> +build_file(Code, Attr, Dict0, NumLabels, NumFuncs, ExtraChunks0, + CompileInfo, CompilerOpts) -> %% Create the code chunk. CodeChunk = chunk(<<"Code">>, <<16:32, (beam_opcodes:format_number()):32, - (beam_dict:highest_opcode(Dict)):32, + (beam_dict:highest_opcode(Dict0)):32, NumLabels:32, NumFuncs:32>>, Code), + %% Build the BEAM debug information chunk. It is important + %% to build it early, because it will add entries to the + %% atom and literal tables. + {ExtraChunks1,Dict} = build_beam_debug_info(ExtraChunks0, CompilerOpts, Dict0), + %% Create the atom table chunk. AtomChunk = build_atom_table(CompilerOpts, Dict), @@ -186,13 +196,14 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks0, CompileInfo, Com TypeTab), %% Create the meta chunk - Meta = proplists:get_value(<<"Meta">>, ExtraChunks0, empty), + Meta = proplists:get_value(<<"Meta">>, ExtraChunks1, empty), MetaChunk = case Meta of empty -> []; Meta -> chunk(<<"Meta">>, Meta) end, + %% Remove Meta chunk from ExtraChunks since it is essential - ExtraChunks = ExtraChunks0 -- [{<<"Meta">>, Meta}], + ExtraChunks = ExtraChunks1 -- [{<<"Meta">>, Meta}], %% Create the attributes and compile info chunks. @@ -381,6 +392,108 @@ filter_essentials([<<>>|T]) -> filter_essentials(T); filter_essentials([]) -> []. +%%% +%%% Build the BEAM debug information chunk. +%%% + +build_beam_debug_info(ExtraChunks, CompilerOpts, Dict) -> + case member(beam_debug_info, CompilerOpts) of + true -> + build_beam_debug_info_1(ExtraChunks, Dict); + false -> + {ExtraChunks,Dict} + end. + +build_beam_debug_info_1(ExtraChunks0, Dict0) -> + DebugTab0 = beam_dict:debug_table(Dict0), + DebugTab1 = [{Index,Info} || + Index := Info <- maps:iterator(DebugTab0, ordered)], + DebugTab = build_bdi_fill_holes(DebugTab1), + NumVars = lists:sum([length(Vs) || {_,Vs} <- DebugTab]), + {Contents0,Dict} = build_bdi(DebugTab, Dict0), + NumItems = length(Contents0), + Contents1 = iolist_to_binary(Contents0), + + 0 = NumItems bsr 31, %Assertion. + 0 = NumVars bsr 31, %Assertion. + + Contents = <>, + ExtraChunks = [{~"DbgB",Contents}|ExtraChunks0], + {ExtraChunks,Dict}. + +build_bdi_fill_holes([]) -> + []; +build_bdi_fill_holes([{_,Item}]) -> + [Item]; +build_bdi_fill_holes([{I0,Item}|[{I1,_}|_]=T]) -> + case I0 + 1 of + I1 -> + [Item|build_bdi_fill_holes(T)]; + Next -> + NewPair = {Next,{none,[]}}, + [Item|build_bdi_fill_holes([NewPair|T])] + end. + +build_bdi([{FrameSize0,Vars0}|Items], Dict0) -> + %% The debug information utilizes the encoding machinery for BEAM + %% instructions. The debug information for `debug_line` + %% instructions is translated to: + %% + %% {call,FrameSize,{list,[VariableName,Where,...]}} + %% + %% Where: + %% + %% FrameSize := 'none' | 0..1023 + %% VariableName := binary() + %% Where := {x,0..1023} | {y,0..1023} | {literal,_} | + %% {integer,_} | {atom,_} | {float,_} | nil + %% + %% The only reason the `call` instruction is used is because it + %% has two operands. + %% + %% The debug information in the following example: + %% + %% {debug_line,[...],1,1, + %% {4, [{'Args',[{y,3}]}, + %% {'Line',[{y,2}]}, + %% {'Live',[{x,0},{y,1}]}]}} + %% + %% will be translated to the following instruction: + %% + %% {call,4,{list,[{literal,<<"Args">>},{y,3}, + %% {literal,<<"Line">>},{y,2}, + %% {literal,<<"Live">>},{y,1}]}} + %% + %% Note that only one register is given for each variable. It + %% is always the last register listed. + + FrameSize = case FrameSize0 of + none -> nil; + entry -> {atom,entry}; + _ -> FrameSize0 + end, + Vars1 = case FrameSize0 of + entry -> + [[Name,Reg] || {Name,[Reg]} <:- Vars0]; + _ -> + [[{literal,atom_to_binary(Name)},last(Regs)] || + {Name,[_|_]=Regs} <:- Vars0] + end, + Vars = append(Vars1), + Instr0 = {call,FrameSize,{list,Vars}}, + {Instr,Dict1} = make_op(Instr0, Dict0), + {Tail,Dict2} = build_bdi(Items, Dict1), + {[Instr|Tail],Dict2}; +build_bdi([], Dict) -> + {[],Dict}. + +%%% +%%% Functions for assembling BEAM instruction. +%%% + bif_type(fnegate, 1) -> {op,fnegate}; bif_type(fadd, 2) -> {op,fadd}; bif_type(fsub, 2) -> {op,fsub}; @@ -397,6 +510,10 @@ make_op({line=Op,Location}, Dict0) -> make_op({executable_line=Op,Location,Index}, Dict0) -> {LocationIndex,Dict} = beam_dict:line(Location, Dict0, Op), encode_op(executable_line, [LocationIndex,Index], Dict); +make_op({debug_line=Op,Location,Index,Live,DebugInfo}, Dict0) -> + {LocationIndex,Dict1} = beam_dict:line(Location, Dict0, Op), + Dict = beam_dict:debug_info(Index, DebugInfo, Dict1), + encode_op(debug_line, [LocationIndex,Index,Live], Dict); make_op({bif, Bif, {f,_}, [], Dest}, Dict) -> %% BIFs without arguments cannot fail. encode_op(bif0, [{extfunc, erlang, Bif, 0}, Dest], Dict); diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 4ddc88874a32..aa44a2f40f14 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -25,7 +25,7 @@ -include("beam_asm.hrl"). -export([module/2]). --import(lists, [keysort/2,member/2,reverse/1,reverse/2, +-import(lists, [flatmap/2,keysort/2,member/2,reverse/1,reverse/2, splitwith/2,usort/1]). -spec module(beam_utils:module_code(), [compile:option()]) -> @@ -172,12 +172,17 @@ collect({put_map,{f,0},Op,S,D,R,{list,Puts}}) -> collect({fmove,S,D}) -> {set,[D],[S],fmove}; collect({fconv,S,D}) -> {set,[D],[S],fconv}; collect({executable_line,_,_}=Line) -> {set,[],[],Line}; +collect({debug_line,_,_,_,_}=Line) -> collect_debug_line(Line); collect({swap,D1,D2}) -> Regs = [D1,D2], {set,Regs,Regs,swap}; collect({make_fun3,F,I,U,D,{list,Ss}}) -> {set,[D],Ss,{make_fun3,F,I,U}}; collect(_) -> error. +collect_debug_line({debug_line,_Loc,_Index,_Live,{_,Vars}}=I) -> + Ss = flatmap(fun({_Name,Regs}) -> Regs end, Vars), + {set,[],Ss,I}. + %% embed_lines([Instruction]) -> [Instruction] %% Combine blocks that would be split by line/1 instructions. %% Also move a line instruction before a block into the block, diff --git a/lib/compiler/src/beam_core_to_ssa.erl b/lib/compiler/src/beam_core_to_ssa.erl index 5b613a3a6cb0..82804c0795a5 100644 --- a/lib/compiler/src/beam_core_to_ssa.erl +++ b/lib/compiler/src/beam_core_to_ssa.erl @@ -93,7 +93,7 @@ %% matching. (Construction of those term types is translated directly %% to SSA instructions.) --record(cg_tuple, {es}). +-record(cg_tuple, {es,keep=ordsets:new()}). -record(cg_map, {var=#b_literal{val=#{}},op,es}). -record(cg_map_pair, {key,val}). -record(cg_cons, {hd,tl}). @@ -148,7 +148,8 @@ get_anno(#cg_select{anno=Anno}) -> Anno. funs=[], %Fun functions free=#{}, %Free variables ws=[] :: [warning()], %Warnings. - no_min_max_bifs=false :: boolean() + no_min_max_bifs=false :: boolean(), + beam_debug_info=false :: boolean() }). -spec module(cerl:c_module(), [compile:option()]) -> @@ -158,8 +159,10 @@ module(#c_module{name=#c_literal{val=Mod},exports=Es,attrs=As,defs=Fs}, Options) Kas = attributes(As), Kes = map(fun (#c_var{name={_,_}=Fname}) -> Fname end, Es), NoMinMaxBifs = proplists:get_bool(no_min_max_bifs, Options), + DebugInfo = proplists:get_bool(beam_debug_info, Options), St0 = #kern{module=Mod, - no_min_max_bifs=NoMinMaxBifs}, + no_min_max_bifs=NoMinMaxBifs, + beam_debug_info=DebugInfo}, {Kfs,St} = mapfoldl(fun function/2, St0, Fs), Body = Kfs ++ St#kern.funs, Code = #b_module{name=Mod,exports=Kes,attributes=Kas,body=Body}, @@ -198,13 +201,18 @@ include_attribute(file) -> false; include_attribute(compile) -> false; include_attribute(_) -> true. -function({#c_var{name={F,Arity}=FA},Body}, St0) -> +function({#c_var{anno=Anno,name={F,Arity}=FA},Body0}, St0) -> try %% Find a suitable starting value for the counter %% used for generating labels and variable names. - Count0 = cerl_trees:next_free_variable_name(Body), + Count0 = cerl_trees:next_free_variable_name(Body0), Count = max(?EXCEPTION_BLOCK + 1, Count0), + %% If this module is being compiled with `beam_debug_info`, + %% insert a special `debug_line` instruction as the + %% first instruction in this function. + Body = handle_debug_line(Anno, Body0), + %% First pass: Basic translation. St1 = St0#kern{func=FA,vcount=Count,fcount=0}, {#ifun{anno=Ab,vars=Kvs,body=B0},[],St2} = expr(Body, new_sub(), St1), @@ -216,6 +224,7 @@ function({#c_var{name={F,Arity}=FA},Body}, St0) -> %% Third pass: Translation to SSA code. FDef = make_ssa_function(Ab, F, Kvs, B1, St5), + {FDef,St5} catch Class:Error:Stack -> @@ -223,6 +232,14 @@ function({#c_var{name={F,Arity}=FA},Body}, St0) -> erlang:raise(Class, Error, Stack) end. +handle_debug_line([{debug_line,{Location,Index}}], #c_fun{body=Body}=Fun) -> + DbgLine = #c_primop{anno=Location, + name=#c_literal{val=debug_line}, + args=[#c_literal{val=Index}]}, + Seq = #c_seq{arg=DbgLine,body=Body}, + Fun#c_fun{body=Seq}; +handle_debug_line(_, Fun) -> Fun. + %%% %%% First pass: Basic translation. %%% @@ -368,6 +385,15 @@ expr(#c_call{anno=A,module=M0,name=F0,args=Cargs}, Sub, St0) -> args=[M0,F0,cerl:make_list(Cargs)]}, expr(Call, Sub, St) end; +expr(#c_primop{anno=A0,name=#c_literal{val=debug_line}, + args=Cargs}, Sub, St0) -> + {Args,Ap,St1} = atomic_list(Cargs, Sub, St0), + #b_set{anno=A1} = I0 = primop(debug_line, A0, Args), + {_,Alias} = Sub, + A = A1#{alias => Alias}, + I = I0#b_set{anno=A}, + St2 = St1#kern{beam_debug_info=true}, + {I,Ap,St2}; expr(#c_primop{anno=A,name=#c_literal{val=match_fail},args=[Arg]}, Sub, St) -> translate_match_fail(Arg, Sub, A, St); expr(#c_primop{anno=A,name=#c_literal{val=Op},args=Cargs}, Sub, St0) -> @@ -1665,9 +1691,24 @@ get_match(#cg_bin_seg{}=Seg, St0) -> get_match(#cg_bin_int{}=BinInt, St0) -> {N,St1} = new_var(St0), {BinInt#cg_bin_int{next=N},[N],St1}; -get_match(#cg_tuple{es=Es}, St0) -> +get_match(#cg_tuple{es=Es}, #kern{beam_debug_info=DebugInfo}=St0) -> {Mes,St1} = new_vars(length(Es), St0), - {#cg_tuple{es=Mes},Mes,St1}; + Keep = + case DebugInfo of + true -> + %% Force extraction of all variables mentioned in the + %% original source to give them a chance to appear in + %% the debug information. This is a not guarantee that + %% they will appear, since they can be killed before + %% reaching a `debug_line` instruction. + Keep0 = [New || + #b_var{name=Old} <- Es && #b_var{name=New} <- Mes, + beam_ssa_codegen:is_original_variable(Old)], + ordsets:from_list(Keep0); + false -> + [] + end, + {#cg_tuple{es=Mes,keep=Keep},Mes,St1}; get_match(#cg_map{op=exact,es=Es0}, St0) -> {Mes,St1} = new_vars(length(Es0), St0), {Es,_} = mapfoldl(fun(#cg_map_pair{}=Pair, [V|Vs]) -> @@ -2251,12 +2292,13 @@ umatch_list(Ms0, Br, St) -> {[M1|Ms1],union(Mu, Us),Stb} end, {[],[],St}, Ms0). -pat_mark_unused(#cg_tuple{es=Es0}=P, Used0, Ps) -> +pat_mark_unused(#cg_tuple{es=Es0,keep=Keep}=P, Used0, Ps) -> %% Not extracting unused tuple elements is an optimization for %% compile time and memory use during compilation. It is probably %% worthwhile because it is common to extract only a few elements %% from a huge record. - Used = intersection(Used0, Ps), + Used1 = ordsets:union(Used0, Keep), + Used = intersection(Used1, Ps), Es = [case member(V, Used) of true -> Var; false -> #b_literal{val=unused} @@ -2373,6 +2415,29 @@ cg(#b_set{op=copy,dst=#b_var{name=Dst},args=[Arg0]}, St0) -> Arg = ssa_arg(Arg0, St0), St = set_ssa_var(Dst, Arg, St0), {[],St}; +cg(#b_set{anno=Anno0,op=debug_line,args=Args0}=Set0, St) -> + Args = ssa_args(Args0, St), + Literals = [{Val,From} || From := #b_literal{val=Val} <- St#cg.vars], + Anno1 = Anno0#{literals => Literals}, + NewAlias = [{To,From} || From := #b_var{name=To} <- St#cg.vars], + case NewAlias of + [_|_] -> + Alias0 = maps:get(alias, Anno0, #{}), + Alias1 = foldl(fun({To,From}, A) -> + case A of + #{To := Vars0} -> + Vars1 = ordsets:add_element(From, Vars0), + A#{To := Vars1}; + #{} -> + A#{To => [From]} + end + end, Alias0, NewAlias), + Anno = Anno1#{alias => Alias1}, + Set = Set0#b_set{anno=Anno,args=Args}, + {[Set],St}; + [] -> + {[Set0#b_set{anno=Anno1,args=Args}],St} + end; cg(#b_set{args=Args0}=Set0, St) -> Args = ssa_args(Args0, St), Set = Set0#b_set{args=Args}, @@ -2434,8 +2499,10 @@ cg(#cg_opaque{val=Check}, St) -> match_cg(#cg_alt{first=F,then=S}, Fail, St0) -> {Tf,St1} = new_label(St0), {Fis,St2} = match_cg(F, Tf, St1), - {Sis,St3} = match_cg(S, Fail, St2), - {Fis ++ [{label,Tf}] ++ Sis,St3}; + St3 = restore_vars(St1, St2), + {Sis,St4} = match_cg(S, Fail, St3), + St5 = restore_vars(St3, St4), + {Fis ++ [{label,Tf}] ++ Sis,St5}; match_cg(#cg_select{var=#b_var{}=Src0,types=Scs}, Fail, St) -> Src = ssa_arg(Src0, St), match_fmf(fun (#cg_type_clause{type=Type,values=Vs}, F, Sta) -> @@ -2476,7 +2543,8 @@ select_cg(Type, Scs, Var, Tf, Vf, St0) -> {Vis,St1} = mapfoldl(fun (S, Sta) -> {Val,Is,Stb} = select_val(S, Var, Vf, Sta), - {{Is,[Val]},Stb} + Stc = restore_vars(Sta, Stb), + {{Is,[Val]},Stc} end, St0, Scs), OptVls = combine(lists:sort(combine(Vis))), {Vls,Sis,St2} = select_labels(OptVls, St1, [], []), @@ -2789,13 +2857,25 @@ test_cg(Test, Inverted, As0, Fail, St0) -> %% an externally generated failure label, LastFail. N.B. We do not %% know or care how the failure labels are used. -match_fmf(F, LastFail, St, [H]) -> - F(H, LastFail, St); +match_fmf(F, LastFail, St0, [H]) -> + {R,St1} = F(H, LastFail, St0), + {R,restore_vars(St0, St1)}; match_fmf(F, LastFail, St0, [H|T]) -> {Fail,St1} = new_label(St0), {R,St2} = F(H, Fail, St1), - {Rs,St3} = match_fmf(F, LastFail, St2, T), - {R ++ [{label,Fail}] ++ Rs,St3}. + St3 = restore_vars(St1, St2), + {Rs,St4} = match_fmf(F, LastFail, St3, T), + {R ++ [{label,Fail}] ++ Rs,St4}. + +%% restore_vars(PreviousState, CurrentSt) -> UpdatedCurrentState. +%% Restore variables to their previous state. When exiting a scope, +%% any substitutions that are no longer applicable will be +%% discarded. More importantly, when generating BEAM debug +%% information, variables bound to literal values will only appear in +%% `debug_line` instructions if they are still in scope. + +restore_vars(St0, St) -> + St#cg{vars=St0#cg.vars}. %% fail_context(State) -> {body | guard, FailureLabel}. %% Return an indication of which part of a function code is diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl index 51f144e40802..c1dae0686b47 100644 --- a/lib/compiler/src/beam_dict.erl +++ b/lib/compiler/src/beam_dict.erl @@ -23,11 +23,12 @@ -moduledoc false. -export([new/0,opcode/2,highest_opcode/1, - atom/2,local/4,export/4,import/4, - string/2,lambda/3,literal/2,line/3,fname/2,type/2, - atom_table/1,local_table/1,export_table/1,import_table/1, - string_table/1,lambda_table/1,literal_table/1, - line_table/1,type_table/1]). + atom/2,local/4,export/4,import/4, + string/2,lambda/3,literal/2,line/3, + fname/2,type/2,debug_info/3, + atom_table/1,local_table/1,export_table/1,import_table/1, + string_table/1,lambda_table/1,literal_table/1, + line_table/1,type_table/1,debug_table/1]). -include("beam_types.hrl"). @@ -35,13 +36,16 @@ -type index() :: non_neg_integer(). +-type frame_size() :: 'none' | non_neg_integer(). +-type debug_info() :: {frame_size(), list()}. + -type atom_tab() :: #{atom() => index()}. -type import_tab() :: gb_trees:tree(mfa(), index()). -type fname_tab() :: #{Name :: term() => index()}. -type line_tab() :: #{{Fname :: index(), Line :: term()} => index()}. -type literal_tab() :: #{Literal :: term() => index()}. --type type_tab() :: #{ Type :: binary() => index()}. - +-type type_tab() :: #{Type :: binary() => index()}. +-type debug_tab() :: #{index() => debug_info()}. -type lambda_info() :: {label(),{index(),label(),non_neg_integer()}}. -type lambda_tab() :: {non_neg_integer(),[lambda_info()]}. -type wrapper() :: #{label() => index()}. @@ -58,6 +62,7 @@ literals = #{} :: literal_tab(), fnames = #{} :: fname_tab(), lines = #{} :: line_tab(), + debug = #{} :: debug_tab(), num_lines = 0 :: non_neg_integer(), %Number of line instructions exec_line = false :: boolean(), next_import = 0 :: non_neg_integer(), @@ -203,7 +208,7 @@ literal1(Key, #asm{literals=Tab0,next_literal=NextIndex}=Dict) -> %% Returns the index for a line instruction (adding information %% to the location information table). --spec line(list(), bdict(), 'line' | 'executable_line') -> +-spec line(list(), bdict(), 'line' | 'executable_line' | 'debug_line') -> {non_neg_integer(), bdict()}. line([], #asm{num_lines=N}=Dict, Instr) when is_atom(Instr) -> @@ -251,6 +256,14 @@ type(Type, #asm{types=Types0}=Dict) -> {Index, Dict#asm{types=Types}} end. +-spec debug_info(index(), debug_info(), bdict()) -> bdict(). + +debug_info(Index, DebugInfo, #asm{debug=DebugTab0}=Dict) + when is_integer(Index) -> + false = is_map_key(Index, DebugTab0), %Assertion. + DebugTab = DebugTab0#{Index => DebugInfo}, + Dict#asm{debug=DebugTab}. + %% Returns the atom table. %% atom_table(Dict, Encoding) -> {LastIndex,[Length,AtomString...]} -spec atom_table(bdict()) -> {non_neg_integer(), [[non_neg_integer(),...]]}. @@ -358,6 +371,12 @@ line_table(#asm{fnames=Fnames0,lines=Lines0, Lines = [L || {L,_} <:- Lines1], {NumLineInstrs,NumFnames,Fnames,NumLines,Lines,ExecLine}. + +-spec debug_table(bdict()) -> debug_tab(). + +debug_table(#asm{debug=Debug}) -> + Debug. + %% Search for binary string Str in the binary string pool Pool. %% old_string(Str, Pool) -> none | Index -spec old_string(binary(), binary()) -> 'none' | pos_integer(). diff --git a/lib/compiler/src/beam_disasm.erl b/lib/compiler/src/beam_disasm.erl index 7859ffe689ba..7d2e9cb33579 100644 --- a/lib/compiler/src/beam_disasm.erl +++ b/lib/compiler/src/beam_disasm.erl @@ -1299,6 +1299,13 @@ resolve_inst({bs_match,[{Fail,Ctx,{z,1},{u,_},Args}]},_,_,_) -> resolve_inst({executable_line,[Location,Index]},_,_,_) -> {executable_line,resolve_arg(Location),resolve_arg(Index)}; +%% +%% OTP 28. +%% + +resolve_inst({debug_line,[Location,Index,Live]},_,_,_) -> + {debug_line,resolve_arg(Location),resolve_arg(Index),resolve_arg(Live)}; + %% %% Catches instructions that are not yet handled. %% diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl index a2011b8752b2..0eeb9bbe84a5 100644 --- a/lib/compiler/src/beam_flatten.erl +++ b/lib/compiler/src/beam_flatten.erl @@ -65,6 +65,7 @@ norm({set,[D],[S|Puts],{alloc,R,{put_map,Op,F}}}) -> norm({set,[],[],remove_message}) -> remove_message; norm({set,[],[],{line,_}=Line}) -> Line; norm({set,[],[],{executable_line,_,_}=Line}) -> Line; +norm({set,[],_,{debug_line,_,_,_,_}=Line}) -> Line; norm({set,[D1,D2],[D1,D2],swap}) -> {swap,D1,D2}. norm_allocate({_Zero,nostack,Nh,[]}, Regs) -> diff --git a/lib/compiler/src/beam_ssa_alias.erl b/lib/compiler/src/beam_ssa_alias.erl index e5e79b8e2eb4..26aaea87fc3d 100644 --- a/lib/compiler/src/beam_ssa_alias.erl +++ b/lib/compiler/src/beam_ssa_alias.erl @@ -610,6 +610,8 @@ aa_is([_I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, {SS0, AAS0}; bs_test_tail -> {SS0, AAS0}; + debug_line -> + {SS0, AAS0}; executable_line -> {SS0, AAS0}; has_map_field -> diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index e5ff7da4c239..917309ff87e6 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -23,6 +23,7 @@ -moduledoc false. -export([module/2]). +-export([is_original_variable/1]). %Called from beam_core_to_ssa. -export([classify_heap_need/2]). %Called from beam_ssa_pre_codegen. -export_type([ssa_register/0]). @@ -41,14 +42,16 @@ regs=#{} :: #{beam_ssa:b_var() => ssa_register()}, ultimate_fail=1 :: beam_label(), catches=gb_sets:empty() :: gb_sets:set(ssa_label()), - fc_label=1 :: beam_label() + fc_label=1 :: beam_label(), + debug_info=false :: boolean() }). -spec module(beam_ssa:b_module(), [compile:option()]) -> - {'ok',beam_asm:module_code()}. + {'ok',beam_asm:module_code()}. -module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) -> - {Asm,St} = functions(Fs, {atom,Mod}), +module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, Opts) -> + DebugInfo = member(beam_debug_info, Opts), + {Asm,St} = functions(Fs, {atom,Mod}, DebugInfo), {ok,{Mod,Es,Attrs,Asm,St#cg.lcount}}. -record(need, {h=0 :: non_neg_integer(), % heap words @@ -109,12 +112,12 @@ module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) -> -type ssa_register() :: xreg() | yreg() | freg() | zreg(). -functions(Forms, AtomMod) -> +functions(Forms, AtomMod, DebugInfo) -> mapfoldl(fun (F, St) -> function(F, AtomMod, St) end, - #cg{lcount=1}, Forms). + #cg{lcount=1,debug_info=DebugInfo}, Forms). -function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) -> - #{func_info:={_,Name,Arity}} = Anno, +function(#b_function{anno=Anno,bs=Blocks,args=Args}, AtomMod, St0) -> + #{func_info := {_,Name,Arity}} = Anno, NoBsMatch = not maps:get(bs_ensure_opt, Anno, false), try assert_exception_block(Blocks), %Assertion. @@ -127,11 +130,12 @@ function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) -> Labels = (St4#cg.labels)#{0=>Entry,?EXCEPTION_BLOCK=>0}, St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry), ultimate_fail=Ult}, - {Body,St} = cg_fun(Blocks, NoBsMatch, St5#cg{fc_label=Fi}), - Asm = [{label,Fi},line(Anno), - {func_info,AtomMod,{atom,Name},Arity}] ++ + {Body,St} = cg_fun(Blocks, Args, NoBsMatch, St5#cg{fc_label=Fi}), + Asm0 = [{label,Fi},line(Anno), + {func_info,AtomMod,{atom,Name},Arity}] ++ add_parameter_annos(Body, Anno) ++ [{label,Ult},if_end], + Asm = fix_debug_line(Asm0, Arity, St), Func = {function,Name,Arity,Entry,Asm}, {Func,St} catch @@ -140,6 +144,24 @@ function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) -> erlang:raise(Class, Error, Stack) end. +fix_debug_line(Is0, Live, #cg{debug_info=true}) -> + case Is0 of + [{label,_}=FiLbl, + {line,_}=Li, + {func_info,_,_,_}=Fi, + {label,_}=Entry, + {debug_line,Location,Index,Live,{none,[]}}|Is] -> + %% Mark this debug_line instruction as being the + %% very first instruction in the function. + Args = [{{integer,I}, [{x,I-1}]} || I <- lists:seq(1, Live)], + DbgLine = {debug_line,Location,Index,Live,{entry,Args}}, + [FiLbl,Li,Fi,Entry,DbgLine|Is]; + _ -> + Is0 + end; +fix_debug_line(Is, _Arity, #cg{debug_info=false}) -> + Is. + assert_exception_block(Blocks) -> %% Assertion: ?EXCEPTION_BLOCK must be a call erlang:error(badarg). case Blocks of @@ -166,7 +188,7 @@ add_parameter_annos([{label, _}=Entry | Body], Anno) -> [Entry | sort(Annos)] ++ Body. -cg_fun(Blocks, NoBsMatch, St0) -> +cg_fun(Blocks, Args, NoBsMatch, St0) -> Linear0 = linearize(Blocks), St1 = collect_catch_labels(Linear0, St0), Linear1 = need_heap(Linear0), @@ -174,7 +196,8 @@ cg_fun(Blocks, NoBsMatch, St0) -> Linear3 = liveness(Linear2, St1), Linear4 = defined(Linear3, St1), Linear5 = opt_allocate(Linear4, St1), - Linear = fix_wait_timeout(Linear5), + Linear6 = fix_wait_timeout(Linear5), + Linear = add_debug_info(Linear6, Args, St1), {Asm,St} = cg_linear(Linear, St1), case NoBsMatch of true -> {Asm,St}; @@ -245,12 +268,17 @@ need_heap_never(_) -> need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) -> {Is1,H1} = need_heap_is(reverse(Is0), H0, []), {Ns,H} = need_heap_terminator(Bs, L, H1), - Is = Ns ++ Is1, + Is = delay_alloc(Ns ++ Is1), Blk = Blk0#cg_blk{is=Is}, need_heap_blks(Bs, H, [{L,Blk}|Acc]); need_heap_blks([], H, Acc) -> {Acc,H}. +delay_alloc([#cg_alloc{}=AI, + #cg_set{op=debug_line}=ELI|Is2]) -> + [ELI|delay_alloc([AI|Is2])]; +delay_alloc(Is) -> Is. + need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) -> Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)}, need_heap_is(Is, #need{}, [Alloc|Acc]); @@ -390,6 +418,7 @@ classify_heap_need(build_stacktrace) -> gc; classify_heap_need(call) -> gc; classify_heap_need(catch_end) -> gc; classify_heap_need(copy) -> neutral; +classify_heap_need(debug_line) -> gc; classify_heap_need(executable_line) -> neutral; classify_heap_need(extract) -> gc; classify_heap_need(get_hd) -> neutral; @@ -689,6 +718,7 @@ need_live_anno(Op) -> bs_start_match -> true; bs_skip -> true; call -> true; + debug_line -> true; put_map -> true; update_record -> true; _ -> false @@ -816,6 +846,7 @@ need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) -> _ -> false end; need_y_init(#cg_set{op=bs_start_match}) -> true; +need_y_init(#cg_set{op=debug_line}) -> true; need_y_init(#cg_set{op=put_map}) -> true; need_y_init(#cg_set{op=update_record}) -> true; need_y_init(#cg_set{}) -> false. @@ -955,6 +986,246 @@ fix_wait_timeout_is([I|Is], Acc) -> fix_wait_timeout_is(Is, [I|Acc]); fix_wait_timeout_is([], _Acc) -> no. +%%% +%%% Gather debug information and add as annotations to `debug_line` +%%% instructions. +%%% +%%% This pass is run when collection of BEAM debug information has +%%% been requested. +%%% + +add_debug_info(Linear0, Args, #cg{regs=Regs,debug_info=true}) -> + Def0 = ordsets:from_list(Args), + Linear = anno_defined_regs(Linear0, Def0, Regs), + FrameSzMap = #{0 => none}, + VarMap = #{}, + add_debug_info_blk(Linear, Regs, FrameSzMap, VarMap); +add_debug_info(Linear, _Args, #cg{debug_info=false}) -> + Linear. + +add_debug_info_blk([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], + Regs, FrameSzMap0, VarMap0) -> + FrameSize0 = map_get(L, FrameSzMap0), + {Is,VarMap,FrameSize} = + add_debug_info_is(Is0, Regs, FrameSize0, VarMap0, []), + Successors = successors(Last), + FrameSzMap = foldl(fun(Succ, Acc) -> + Acc#{Succ => FrameSize} + end, FrameSzMap0, Successors), + Blk = Blk0#cg_blk{is=Is}, + [{L,Blk}|add_debug_info_blk(Bs, Regs, FrameSzMap, VarMap)]; +add_debug_info_blk([], _Regs, _FrameSzMap, _VarMap) -> + []. + +add_debug_info_is([#cg_alloc{stack=FrameSize}=I|Is], + Regs, FrameSize0, VarMap, Acc) -> + if + is_integer(FrameSize) -> + add_debug_info_is(Is, Regs, FrameSize, VarMap, [I|Acc]); + true -> + add_debug_info_is(Is, Regs, FrameSize0, VarMap, [I|Acc]) + end; +add_debug_info_is([#cg_set{anno=#{was_phi := true},op=copy}=I|Is], + Regs, FrameSize, VarMap, Acc) -> + %% This copy operation originates from a phi node. The source and + %% destination are not equivalent and must not be added to VarMap. + add_debug_info_is(Is, Regs, FrameSize, VarMap, [I|Acc]); +add_debug_info_is([#cg_set{anno=Anno,op=copy,dst=#b_var{name=Dst}, + args=[#b_var{name=Src}]}=I|Is], + Regs, FrameSize, VarMap0, Acc) -> + VarMap = case Anno of + #{delayed_yreg_copy := true} -> + VarMap0#{Src => [Dst]}; + #{} -> + VarMap0#{Dst => [Src]} + end, + add_debug_info_is(Is, Regs, FrameSize, VarMap, [I|Acc]); +add_debug_info_is([#cg_set{anno=Anno0,op=debug_line,args=[Index]}=I0|Is], + Regs, FrameSize, VarMap, Acc) -> + #{def_regs := DefRegs, + alias := Alias, + literals := Literals0, + live := NumLive0} = Anno0, + AliasMap = maps:merge_with(fun(_, L1, L2) -> L1 ++ L2 end, + VarMap, Alias), + Literals1 = [{get_original_names(#b_var{name=Var}, AliasMap),Val} || + {Val,Var} <:- Literals0], + Literals = [{hd(Vars),[{literal,Val}]} || + {Vars,Val} <:- Literals1, Vars =/= []], + RegVarMap = [{map_get(V, Regs),get_original_names(V, AliasMap)} || + V <- DefRegs, + not is_beam_register(V)], + S0 = sofs:family(RegVarMap, [{reg,[variable]}]), + S1 = sofs:family_to_relation(S0), + S2 = sofs:converse(S1), + S3 = sofs:relation_to_family(S2), + S = sort(Literals ++ sofs:to_external(S3)), + Live = max(NumLive0, num_live(DefRegs, Regs)), + Info = {FrameSize,S}, + I = I0#cg_set{args=[Index,#b_literal{val=Live},#b_literal{val=Info}]}, + add_debug_info_is(Is, Regs, FrameSize, VarMap, [I|Acc]); +add_debug_info_is([#cg_set{}=I|Is], Regs, FrameSize, VarMap, Acc) -> + add_debug_info_is(Is, Regs, FrameSize, VarMap, [I|Acc]); +add_debug_info_is([], _Regs, FrameSize, VarMap, Info) -> + {reverse(Info),VarMap,FrameSize}. + +get_original_names(#b_var{name=Name}, AliasMap) -> + get_original_names_1([Name], AliasMap, sets:new()). + +get_original_names_1([Name|Names0], AliasMap, Seen0) -> + case sets:is_element(Name, Seen0) of + true -> + get_original_names_1(Names0, AliasMap, Seen0); + false -> + Seen = sets:add_element(Name, Seen0), + Names = case AliasMap of + #{Name := Vars0} -> + Vars = Vars0 ++ Names0, + get_original_names_1(Vars, AliasMap, Seen); + #{} -> + Names0 + end, + case is_original_variable(Name) of + true -> + [Name|get_original_names_1(Names, AliasMap, Seen)]; + false -> + get_original_names_1(Names, AliasMap, Seen) + end + end; +get_original_names_1([], _, _) -> + []. + +-spec is_original_variable(Name) -> boolean() when + Name :: non_neg_integer() | atom(). + +%% Test whether the variable name originates from the Erlang source +%% code, meaning that it was not invented by the compiler. It is +%% sufficient to check that the first character can legally start an +%% Erlang variable name, because all new variables inserted by the +%% compiler always start with an invalid character such as "@" or a +%% lower-case letter. +is_original_variable(Name) when is_atom(Name) -> + <> = atom_to_binary(Name), + if + %% A variable name must start with "_" or an upper-case letter + %% included in ISO Latin-1. + C =:= $_ -> true; + $A =< C, C =< $Z -> true; + $À =< C, C =< $Þ, C =/= $× -> true; + true -> false + end; +is_original_variable(Name) when is_integer(Name) -> + false. + +%%% +%%% Annotate `debug_line` instructions with all variables that have +%%% been defined and are still available in a BEAM register. +%%% + +anno_defined_regs(Linear, Def, Regs) -> + def_regs(Linear, #{0 => Def}, Regs). + +def_regs([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], DefMap0, Regs) -> + Def0 = map_get(L, DefMap0), + {Is,Def,MaybeDef} = def_regs_is(Is0, Regs, Def0, []), + DefMap = def_successors(Last, Def, MaybeDef, DefMap0), + Blk = Blk0#cg_blk{is=Is}, + [{L,Blk}|def_regs(Bs, DefMap, Regs)]; +def_regs([], _, _) -> []. + +def_regs_is([#cg_alloc{live=Live}=I|Is], Regs, Def0, Acc) when is_integer(Live) -> + Def = trim_xregs(Def0, Live, Regs), + def_regs_is(Is, Regs, Def, [I|Acc]); +def_regs_is([#cg_set{op=succeeded,args=[Var]}=I], _Regs, Def, Acc) -> + %% Var will only be defined on the success branch of the `br` + %% for this block. + MaybeDef = [Var], + {reverse(Acc, [I]),Def,MaybeDef}; +def_regs_is([#cg_set{op=kill_try_tag,args=[#b_var{}=Tag]}=I|Is], Regs, Def0, Acc) -> + Def = ordsets:del_element(Tag, Def0), + def_regs_is(Is, Regs, Def, [I|Acc]); +def_regs_is([#cg_set{op=catch_end,dst=Dst,args=[#b_var{}=Tag|_]}=I|Is], Regs, Def0, Acc) -> + Def1 = trim_xregs(Def0, 0, Regs), + Def2 = kill_regs(Def1, [Dst,Tag], Regs), + Def = ordsets:add_element(Dst, Def2), + def_regs_is(Is, Regs, Def, [I|Acc]); +def_regs_is([#cg_set{anno=Anno0,op=debug_line}=I0|Is], Regs, Def, Acc) -> + Anno = Anno0#{def_regs => Def}, + I = I0#cg_set{anno=Anno}, + def_regs_is(Is, Regs, Def, [I|Acc]); +def_regs_is([#cg_set{anno=Anno,dst=Dst,op={bif,Bif},args=Args}=I|Is], Regs, Def0, Acc) -> + Def1 = case is_gc_bif(Bif, Args) of + true -> + #{live := Live} = Anno, + trim_xregs(Def0, Live, Regs); + false -> + Def0 + end, + case Regs of + #{Dst := {Tag,_}=R} when Tag =:= x; Tag =:= y -> + Def2 = kill_reg(Def1, R, Regs), + Def = ordsets:add_element(Dst, Def2), + def_regs_is(Is, Regs, Def, [I|Acc]); + #{} -> + def_regs_is(Is, Regs, Def1, [I|Acc]) + end; +def_regs_is([#cg_set{anno=Anno,dst=Dst}=I|Is], Regs, Def0, Acc) -> + Def1 = case Anno of + #{live := Live} -> trim_xregs(Def0, Live, Regs); + #{} -> Def0 + end, + Def2 = case Anno of + #{kill_yregs := KillYregs} -> + kill_regs(Def1, KillYregs, Regs); + #{} -> + Def1 + end, + case Anno of + #{clobbers := true} -> + Def3 = trim_xregs(Def2, 0, Regs), + Def = case Regs of + #{Dst := {Tag,_}=R} when Tag =:= x; Tag =:= y -> + Def4 = kill_reg(Def3, R, Regs), + ordsets:add_element(Dst, Def4); + #{} -> + Def3 + end, + def_regs_is(Is, Regs, Def, [I|Acc]); + #{} -> + case Regs of + #{Dst := {Tag,_}=R} when Tag =:= x; Tag =:= y -> + Def3 = kill_reg(Def2, R, Regs), + Def = ordsets:add_element(Dst, Def3), + def_regs_is(Is, Regs, Def, [I|Acc]); + #{} -> + def_regs_is(Is, Regs, Def1, [I|Acc]) + end + end; +def_regs_is([], _Regs, Def, Acc) -> + {reverse(Acc),Def,[]}. + +trim_xregs([V|Vs], Live, Regs) -> + case Regs of + #{V := {x,R}} when R < Live -> + [V|trim_xregs(Vs, Live, Regs)]; + #{V := {y,_}}-> + [V|trim_xregs(Vs, Live, Regs)]; + #{} -> + trim_xregs(Vs, Live, Regs) + end; +trim_xregs([], _, _) -> []. + +kill_reg([V|Vs], R, Regs) -> + case Regs of + #{V := R} -> Vs; + #{} -> [V|kill_reg(Vs, R, Regs)] + end; +kill_reg([], _, _) -> []. + +kill_regs(Defs, KillRegs0, Regs) -> + KillRegs = #{map_get(V, Regs) => [] || V <- KillRegs0}, + [D || D <- Defs, not is_map_key(map_get(D, Regs), KillRegs)]. + %%% %%% Here follows the main code generation functions. %%% @@ -1822,6 +2093,14 @@ cg_instr(bs_get_position, [Ctx], Dst, Set) -> cg_instr(executable_line, [{integer,Index}], _Dst, #cg_set{anno=Anno}) -> {line,Location} = line(Anno), [{executable_line,Location,Index}]; +cg_instr(debug_line, [{integer,Index},{integer,Live},{literal,Info}], + _Dst, #cg_set{anno=Anno}) -> + case line(Anno) of + {line,[]} -> + []; + {line,Location} -> + [{debug_line,Location,Index,Live,Info}] + end; cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) -> Live = get_live(Set), [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}]; @@ -2133,9 +2412,10 @@ translate_phis(L, #cg_br{succ=Target,fail=Target}, Blocks) -> end; translate_phis(_, _, _) -> []. -phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) -> - CopyArgs = [V || {V,Target} <:- PhiArgs, Target =:= L], - [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)]; +phi_copies([#b_set{anno=Anno0,dst=Dst,args=PhiArgs}|Sets], L) -> + CopyArgs = [V || {V,Target} <- PhiArgs, Target =:= L], + Anno = Anno0#{was_phi => true}, + [#cg_set{anno=Anno,op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)]; phi_copies([], _) -> []. %% opt_move_to_x0([Instruction]) -> [Instruction]. diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index 356d2ffdd706..ec5998d0908d 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -114,6 +114,8 @@ functions([], _Ps) -> []. passes(Opts) -> AddPrecgAnnos = proplists:get_bool(dprecg, Opts), + BeamDebugInfo = proplists:get_bool(beam_debug_info, Opts), + Ps = [?PASS(assert_no_critical_edges), %% Preliminaries. @@ -121,6 +123,12 @@ passes(Opts) -> ?PASS(sanitize), ?PASS(expand_match_fail), ?PASS(expand_update_tuple), + + case BeamDebugInfo of + false -> ignore; + true -> ?PASS(break_out_debug_line) + end, + ?PASS(place_frames), ?PASS(fix_receives), @@ -820,6 +828,21 @@ sanitize_is([], Last, _InBlocks, _Blocks, Count, Values, Changed, Acc) -> no_change end. +do_sanitize_is(#b_set{anno=Anno0,op=debug_line,args=Args0}=I0, + Is, Last, InBlocks, Blocks, Count, Values, _Changed0, Acc) -> + Args = sanitize_args(Args0, Values), + #{alias := Alias0, literals := Literals0} = Anno0, + Alias = sanitize_alias(Alias0, Values), + Anno1 = Anno0#{alias := Alias}, + Anno = case [{Val,From} || #b_var{name=From} := #b_literal{val=Val} <- Values] of + [] -> + Anno1; + [_|_]=Literals -> + Anno1#{literals => Literals ++ Literals0} + end, + I = I0#b_set{anno=Anno,args=Args}, + Changed = true, + sanitize_is(Is, Last, InBlocks, Blocks, Count, Values, Changed, [I|Acc]); do_sanitize_is(#b_set{op=Op,dst=Dst,args=Args0}=I0, Is, Last, InBlocks, Blocks, Count, Values, Changed0, Acc) -> Args = sanitize_args(Args0, Values), @@ -853,6 +876,19 @@ sanitize_last(#b_blk{last=Last0}=Blk, Values) -> Blk end. +sanitize_alias(Alias, Values) -> + sanitize_alias_1(maps:keys(Alias), Values, Alias). + +sanitize_alias_1([Old|Vs], Values, Alias0) -> + Alias = case Values of + #{#b_var{name=Old} := #b_var{name=New}} -> + Alias0#{New => map_get(Old, Alias0)}; + #{} -> + Alias0 + end, + sanitize_alias_1(Vs, Values, Alias); +sanitize_alias_1([], _Values, Alias) -> Alias. + sanitize_args(Args, Values) -> [sanitize_arg(Arg, Values) || Arg <- Args]. @@ -1191,6 +1227,91 @@ sort_update_tuple([#b_literal{}=Index, Value | Updates], Acc) -> sort_update_tuple([], Acc) -> append([[Index, Value] || {Index, Value} <:- sort(fun erlang:'>='/2, Acc)]). + +%%% +%%% Avoid placing stack frame allocation instructions before an +%%% `debug_line` instruction to potentially provide information for +%%% more variables. +%%% +%%% This sub pass is only run when the `beam_debug_info` option has been given. +%%% +%%% As an example, consider this function: +%%% +%%% foo(A, B, C) -> +%%% {ok,bar(A),B}. +%%% +%%% When compiled with the `beam_debug_info` option the first part of the SSA code +%%% will look like this: +%%% +%%% 0: +%%% _7 = debug_line `1` +%%% _3 = call (`bar`/1), _0 +%%% +%%% The beam_ssa_pre_codegen pass will place a stack frame before the block: +%%% +%%% %% #{frame_size => 1,yregs => #{{b_var,1} => []}} +%%% 0: +%%% [1] y0/_12 = copy x1/_1 +%%% [3] z0/_7 = debug_line `1` +%%% +%%% In the resulting BEAM code there will not be any information for +%%% variable `C`, because the allocate instruction will kill it before +%%% reaching the `debug_line` instruction: +%%% +%%% {allocate,1,2}. +%%% {move,{x,1},{y,0}}. +%%% {debug_line,[{location,...}],1, +%%% {1,[{'A',[{x,0}]},{'B',[{x,1},{y,0}]}]}, +%%% 2}. +%%% +%%% If we split the block after the `debug_line` instruction, the +%%% allocation of the stack frame will be placed after the +%%% `debug_line` instruction: +%%% +%%% 0: +%%% [1] z0/_7 = debug_line `1` +%%% [3] br ^12 +%%% +%%% %% #{frame_size => 1,yregs => #{{b_var,1} => []}} +%%% 12: +%%% [5] y0/_13 = copy x1/_1 +%%% [7] x0/_3 = call (`bar`/1), x0/_0 +%%% +%%% In the resulting BEAM code, there will now be information for variable `C`: +%%% +%%% {debug_line,[{location,"t.erl",5}], +%%% 1, +%%% {none,[{'A',[{x,0}]},{'B',[{x,1}]},{'C',[{x,2}]}]}, +%%% 2}. +%%% {allocate,1,2}. +%%% + +break_out_debug_line(#st{ssa=Blocks0,cnt=Count0}=St) -> + RPO = beam_ssa:rpo(Blocks0), + + %% Calculate the set of all indices for `debug_line` instructions + %% that occur as the first instruction in a block. Splitting after + %% every `debug_line` instruction is not always beneficial, and + %% can even result in worse information about variables. + F = fun(_, #b_blk{is=[#b_set{op=debug_line, + args=[#b_literal{val=Index}]}|_]}, Acc) -> + sets:add_element(Index, Acc); + (_, _, Acc) -> + Acc + end, + ToBeSplit = beam_ssa:fold_blocks(F, RPO, sets:new(), Blocks0), + + %% Now split blocks after the found `debug_line` instructions that + %% are known to start blocks. + P = fun(#b_set{op=debug_line,args=[#b_literal{val=Index}]}) -> + sets:is_element(Index, ToBeSplit); + (_) -> + false + end, + {Blocks,Count} = beam_ssa:split_blocks_after(RPO, P, Blocks0, Count0), + + St#st{ssa=Blocks,cnt=Count}. + %%% %%% Find out where frames should be placed. %%% @@ -1985,7 +2106,8 @@ copy_retval_is([#b_set{op=call,dst=#b_var{}=Dst}=I0|Is], RC, Yregs, case sets:is_element(Dst, Yregs) of true -> {NewVar,Count} = new_var(Count1), - Copy = #b_set{op=copy,dst=Dst,args=[NewVar]}, + Copy = #b_set{anno=#{delayed_yreg_copy => true}, + op=copy,dst=Dst,args=[NewVar]}, I = I1#b_set{dst=NewVar}, copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]); false -> @@ -2615,6 +2737,7 @@ use_zreg(bs_checked_skip) -> yes; use_zreg(bs_ensure) -> yes; use_zreg(bs_match_string) -> yes; use_zreg(bs_set_position) -> yes; +use_zreg(debug_line) -> yes; use_zreg(executable_line) -> yes; use_zreg(kill_try_tag) -> yes; use_zreg(landingpad) -> yes; diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl index 3e013d8ce143..04db35a6952d 100644 --- a/lib/compiler/src/beam_trim.erl +++ b/lib/compiler/src/beam_trim.erl @@ -304,6 +304,12 @@ remap([return|_]=Is, _) -> remap([{line,_}=I|Is], Remap) -> [I|remap(Is, Remap)]. +remap_block([{set,[],Ss0,{debug_line,_,_,_,_}=Info0}|Is], Remap) -> + Ss = remap_args(Ss0, Remap), + {debug_line,Loc,Index,Live,DebugInfo0} = Info0, + DebugInfo = remap_debug_info(DebugInfo0, Remap), + Info = {debug_line,Loc,Index,Live,DebugInfo}, + [{set,[],Ss,Info}|remap_block(Is, Remap)]; remap_block([{set,[{x,_}]=Ds,Ss0,Info}|Is], Remap) -> Ss = remap_args(Ss0, Remap), [{set,Ds,Ss,Info}|remap_block(Is, Remap)]; @@ -313,6 +319,12 @@ remap_block([{set,Ds0,Ss0,Info}|Is], Remap) -> [{set,Ds,Ss,Info}|remap_block(Is, Remap)]; remap_block([], _) -> []. +remap_debug_info({FrameSize0,Vars0}, {Trim,Map}) -> + FrameSize = FrameSize0 - Trim, + Vars = [{Name,[remap_arg(Arg, Trim, Map) || Arg <- Args]} || + {Name,Args} <- Vars0], + {FrameSize,Vars}. + remap_args(Args, {Trim,Map}) -> [remap_arg(Arg, Trim, Map) || Arg <- Args]. @@ -373,6 +385,8 @@ is_safe_label([{call_ext,_,{extfunc,M,F,A}}|_]) -> erl_bifs:is_exit_bif(M, F, A); is_safe_label(_) -> false. +is_safe_label_block([{set,[],_,{debug_line,_,_,_,_}}|_]) -> + false; is_safe_label_block([{set,Ds,Ss,_}|Is]) -> IsYreg = fun(#tr{r={y,_}}) -> true; ({y,_}) -> true; diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 9d7d202caf2d..44c75483f251 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -375,8 +375,11 @@ vi({'%',_}, Vst) -> Vst; vi({line,_}, Vst) -> Vst; -vi({executable_line,_,_}, Vst) -> +vi({executable_line,_,Index}, Vst) when is_integer(Index) -> Vst; +vi({debug_line,_,Index,Live,Info}, Vst) when is_integer(Index), + is_integer(Live) -> + validate_debug_line(Info, Live, Vst); vi(nif_start, Vst) -> Vst; %% @@ -2157,6 +2160,44 @@ validate_select_tuple_arity(Fail, [], _, #vst{}=Vst) -> kill_state(SuccVst) end). +%% +%% Validate debug information in `debug_line` instructions. +%% + +validate_debug_line({entry,Args}, Live, Vst) -> + do_validate_debug_line(none, Live, Vst), + _ = [get_term_type(Reg, Vst) || {_Name,[Reg]} <:- Args], + prune_x_regs(Live, Vst); +validate_debug_line({Stk,Vars}, Live, Vst0) -> + do_validate_debug_line(Stk, Live, Vst0), + Vst = prune_x_regs(Live, Vst0), + _ = [validate_dbg_vars(Regs, Name, Vst) || {Name,Regs} <:- Vars], + Vst. + +do_validate_debug_line(ExpectedStk, Live, #vst{current=St}=Vst) -> + case St of + #st{numy=ExpectedStk} -> + ok; + #st{numy=ActualStk} -> + error({beam_debug_info,frame_size,ExpectedStk,actual,ActualStk}) + end, + verify_live(Live, Vst), + verify_y_init(Vst). + +validate_dbg_vars([R|Rs], Name, Vst) -> + Type = get_term_type(R, Vst), + validate_dbg_vars(Rs, Type, Name, Vst). + +validate_dbg_vars([R|Rs], Type, Name, Vst) -> + case get_term_type(R, Vst) of + Type -> + validate_dbg_vars(Rs, Type, Name, Vst); + OtherType -> + error({type_mismatch,Name,OtherType,Type}) + end; +validate_dbg_vars([], _Type, _Name, _Vst) -> + ok. + %% %% Infers types from comparisons, looking at the expressions that produced the %% compared values and updates their types if we've learned something new from diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl index b750457e3dde..3caef453f4b0 100644 --- a/lib/compiler/src/beam_z.erl +++ b/lib/compiler/src/beam_z.erl @@ -123,6 +123,8 @@ undo_rename(I) -> I. remove_redundant_lines(Is) -> remove_redundant_lines_1(Is, none). +remove_redundant_lines_1([{debug_line,_,_,_,_}=I|Is], _PrevLoc) -> + [I|remove_redundant_lines_1(Is, none)]; remove_redundant_lines_1([{executable_line,_,_}=I|Is], _PrevLoc) -> [I|remove_redundant_lines_1(Is, none)]; remove_redundant_lines_1([{line,Loc}=I|Is], PrevLoc) -> diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 71564f9d0f10..701b6cb13c57 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -1080,6 +1080,9 @@ expand_opt(r26, Os) -> [no_bsm_opt | expand_opt(r27, Os)]; expand_opt(r27, Os) -> [no_long_atoms, compressed_literals | Os]; +expand_opt(beam_debug_info, Os) -> + [beam_debug_info, no_copt, no_bsm_opt, no_bool_opt, + no_share_opt, no_recv_opt, no_ssa_opt, no_throw_opt | Os]; expand_opt({debug_info_key,_}=O, Os) -> [encrypt_debug_info,O|Os]; expand_opt(no_type_opt=O, Os) -> @@ -1676,7 +1679,8 @@ abstr_passes(AbstrStatus) -> {delay,[{iff,debug_info,?pass(save_abstract_code)}]}, - {delay,[{iff,line_coverage,{pass,sys_coverage}}]}, + {delay,[{iff,line_coverage,{pass,sys_coverage}}, + {iff,beam_debug_info,?pass(beam_debug_info)}]}, {iff,'dcover',{src_listing,"cover"}}, ?pass(expand_records), @@ -1696,6 +1700,7 @@ core_passes(CoreStatus) -> case CoreStatus of non_verified_core -> [?pass(core_lint_module), + ?pass(core_compile_directives), {unless,no_core_prepare,{pass,sys_core_prepare}}, {iff,dprep,{listing,"prepare"}}]; verified_core -> @@ -2359,9 +2364,18 @@ legalize_vars(Code0, St) -> end, Code0), {ok,Code,St}. -compile_directives(Forms, #compile{options=Opts0}=St0) -> - Opts1 = expand_opts(flatten([C || {attribute,_,compile,C} <- Forms])), - Opts = Opts1 ++ Opts0, +compile_directives(Forms, St) -> + Opts = [C || {attribute,_,compile,C} <- Forms], + compile_directives_1(Opts, Forms, St). + +core_compile_directives(Core, St) -> + Attrs = [{cerl:concrete(Name),cerl:concrete(Value)} || + {Name,Value} <:- cerl:module_attrs(Core)], + Opts = [C || {compile,C} <- Attrs], + compile_directives_1(Opts, Core, St). + +compile_directives_1(Opts1, Forms, #compile{options=Opts0}=St0) -> + Opts = expand_opts(flatten(Opts1)) ++ Opts0, St1 = St0#compile{options=Opts}, case any_obsolete_option(Opts) of {yes,Opt} -> @@ -2503,6 +2517,10 @@ debug_info(#compile{module=Module,ofile=OFile}=St) -> {ok,DebugInfo,Opts2} end. +beam_debug_info(Code0, #compile{}=St) -> + {ok,Code} = sys_coverage:beam_debug_info(Code0), + {ok,Code,St}. + debug_info_chunk(#compile{mod_options=ModOpts0, options=CompOpts, abstract_code=Abst}) -> @@ -2825,7 +2843,34 @@ do_src_listing(Lf, Fs) -> foreach(fun (F) -> io:put_chars(Lf, [erl_pp:form(F, Opts),"\n"]) end, Fs). -listing(Ext, Code, St0) -> +listing(Ext, Code0, St0) -> + Code = maybe + %% Ensure that a pretty-printed Core Erlang module + %% compiled with the `beam_debug_info` option can be + %% compiled. + true ?= cerl:is_c_module(Code0), + true ?= lists:member(beam_debug_info, St0#compile.options), + + %% First check whether the `beam_debug_info` option is + %% already present. + Attrs0 = cerl:module_attrs(Code0), + Opts0 = [{cerl:concrete(Name),cerl:concrete(Value)} || + {Name,Value} <:- Attrs0], + Opts = [Opt || {compile,Opts} <- Opts0, + Opt <- lists:flatten([Opts])], + false ?= lists:member(beam_debug_info, Opts), + + %% Add a `-compile(beam_debug_info)` attribute. + Compile = {cerl:abstract(compile), + cerl:abstract(beam_debug_info)}, + Attrs = [Compile|Attrs0], + cerl:update_c_module(Code0, cerl:module_name(Code0), + cerl:module_exports(Code0), + Attrs, cerl:module_defs(Code0)) + else + _ -> + Code0 + end, St = St0#compile{encoding = none}, listing(fun(Lf, Fs) -> beam_listing:module(Lf, Fs) end, Ext, Code, St). diff --git a/lib/compiler/src/core_pp.erl b/lib/compiler/src/core_pp.erl index 12fd92855e57..018b5866551c 100644 --- a/lib/compiler/src/core_pp.erl +++ b/lib/compiler/src/core_pp.erl @@ -62,7 +62,11 @@ maybe_anno(Node, Fun, #ctxt{clean=true}=Ctxt) -> maybe_anno(Node, Fun, Ctxt, As0); Line -> As = strip_line(As0), - if Line > Ctxt#ctxt.line -> + NeedsAnno = needs_line_anno(Node), + if + NeedsAnno -> + maybe_anno(Node, Fun, Ctxt, As0); + Line > Ctxt#ctxt.line -> [io_lib:format("%% Line ~w",[Line]), nl_indent(Ctxt), maybe_anno(Node, Fun, Ctxt#ctxt{line = Line}, As) @@ -72,6 +76,14 @@ maybe_anno(Node, Fun, #ctxt{clean=true}=Ctxt) -> end end. +needs_line_anno(Node) -> + case (cerl:is_c_primop(Node) andalso + cerl:concrete(cerl:primop_name(Node))) of + debug_line -> true; + executable_line -> true; + _ -> false + end. + maybe_anno(Node, Fun, Ctxt, []) -> Fun(Node, Ctxt); maybe_anno(Node, Fun, Ctxt, List) -> diff --git a/lib/compiler/src/genop.tab b/lib/compiler/src/genop.tab index fdf038c29dcb..ab1116dd5946 100755 --- a/lib/compiler/src/genop.tab +++ b/lib/compiler/src/genop.tab @@ -695,3 +695,9 @@ BEAM_FORMAT_NUMBER=0 ## @spec executable_line Location Index ## @doc Provide location for an executable line. 183: executable_line/2 + +# OTP 28 + +## @spec debug_line Location Index Live +## @doc Provide location for a place where a break point can be placed. +184: debug_line/3 diff --git a/lib/compiler/src/sys_coverage.erl b/lib/compiler/src/sys_coverage.erl index 653aa97495db..2b00f5cd3507 100644 --- a/lib/compiler/src/sys_coverage.erl +++ b/lib/compiler/src/sys_coverage.erl @@ -21,8 +21,8 @@ -module(sys_coverage). -moduledoc false. --export([module/2,cover_transform/2]). --import(lists, [member/2,reverse/1,reverse/2]). +-export([module/2,cover_transform/2,beam_debug_info/1]). +-import(lists, [duplicate/2,member/2,reverse/1,reverse/2]). -type attribute() :: atom(). -type form() :: {function, integer(), atom(), arity(), _} @@ -34,16 +34,8 @@ -spec module([form()], [compile:option()]) -> {'ok',[form()]}. -module(Forms0, _Opts) when is_list(Forms0) -> - put(executable_line_index, 1), - GetIndex = fun(_, _, _, _, _) -> - Index = get(executable_line_index), - put(executable_line_index, Index + 1), - Index - end, - Forms = transform(Forms0, GetIndex), - erase(executable_line_index), - Forms. +module(Forms, _Opts) when is_list(Forms) -> + transform(Forms, executable_line). %% Undocumented helper function for the `cover` module. -spec cover_transform([form()], index_fun()) -> @@ -51,7 +43,14 @@ module(Forms0, _Opts) when is_list(Forms0) -> cover_transform(Forms, IndexFun) when is_list(Forms), is_function(IndexFun, 5) -> - transform(Forms, IndexFun). + transform(Forms, IndexFun, executable_line). + +%% Undocumented helper function for inserting `debug_line` instructions. + +-spec beam_debug_info([form()]) -> {'ok',[form()]}. + +beam_debug_info(Forms) when is_list(Forms) -> + transform(Forms, debug_line). %%% %%% Local functions. @@ -66,7 +65,8 @@ cover_transform(Forms, IndexFun) when is_list(Forms), true -> ?BLOCK(Expr) end). --define(EXECUTABLE_LINE, executable_line). + +-type bump_instruction() :: 'executable_line' | 'debug_line'. -record(vars, {module=[] :: module() | [], @@ -76,11 +76,23 @@ cover_transform(Forms, IndexFun) when is_list(Forms), lines=[] :: [non_neg_integer()], bump_lines=[] :: [non_neg_integer()], in_guard=false :: boolean(), - index_fun :: index_fun() + index_fun :: index_fun(), + bump_instr :: bump_instruction() }). -transform(Code, IndexFun) -> - Vars = #vars{index_fun=IndexFun}, +transform(Forms, BumpInstr) -> + put(bump_index, 1), + GetIndex = fun(_, _, _, _, _) -> + Index = get(bump_index), + put(bump_index, Index + 1), + Index + end, + Result = transform(Forms, GetIndex, BumpInstr), + erase(bump_index), + Result. + +transform(Code, IndexFun, BumpInstr) -> + Vars = #vars{index_fun=IndexFun,bump_instr=BumpInstr}, transform(Code, [], Vars, none, on). transform([Form0|Forms], MungedForms, Vars0, MainFile0, Switch0) -> @@ -208,6 +220,26 @@ munge({attribute,_,file,{File,_}}=Form, Vars, none, _) -> {Form,Vars,File,on}; munge({attribute,_,module,Mod}=Form, Vars, MainFile, Switch) when is_atom(Mod) -> {Form,Vars#vars{module=Mod},MainFile,Switch}; +munge({function,Anno,Function,Arity,Clauses0}, + #vars{bump_instr=debug_line}=Vars0, _MainFile, on) -> + %% We want to insert a `debug_line` instruction at the beginning + %% of the function before all clauses, but there is not really a + %% way to express that. So we insert an extra clause before all + %% other clauses. The v3_core pass will pick up the annotation and + %% index from the `debug_line` instruction in the body of the + %% clause and will then discard it. + Vars = Vars0#vars{function=Function, + arity=Arity, + clause=1, + lines=[], + bump_lines=[]}, + Args = duplicate(Arity, {var,Anno,'_'}), + FakeBif = {remote,Anno,{atom,Anno,fake},{atom,Anno,is_beam_bif_info}}, + Gs = [[{call,Anno,FakeBif,[]}]], + Body = [{atom,Anno,ignore}], + Clauses = [{clause,Anno,Args,Gs,Body}|Clauses0], + MungedClauses = munge_fun_clauses(Clauses, Vars), + {{function,Anno,Function,Arity,MungedClauses},Vars,on}; munge({function,Anno,Function,Arity,Clauses}, Vars0, _MainFile, on) -> Vars = Vars0#vars{function=Function, arity=Arity, @@ -367,7 +399,7 @@ fix_expr(E, _Line, _Bump) -> fix_clauses([], _Line, _Bump) -> []; fix_clauses(Cs, Line, Bump) -> - case bumps_line(lists:last(Cs), Line) of + case bumps_line(lists:last(Cs), Line, Bump) of true -> fix_cls(Cs, Line, Bump); false -> @@ -377,7 +409,7 @@ fix_clauses(Cs, Line, Bump) -> fix_cls([], _Line, _Bump) -> []; fix_cls([Cl | Cls], Line, Bump) -> - case bumps_line(Cl, Line) of + case bumps_line(Cl, Line, Bump) of true -> [fix_expr(C, Line, Bump) || C <- [Cl | Cls]]; false -> @@ -390,24 +422,30 @@ fix_cls([Cl | Cls], Line, Bump) -> [{clause,CA,P,G,Body1} | fix_cls(Cls, Line, Bump)] end. -bumps_line(E, L) -> - try bumps_line1(E, L) catch true -> true end. +bumps_line(E, L, Bump) -> + try + bumps_line1(E, L, Bump) + catch + throw:true -> + true + end. -bumps_line1({?EXECUTABLE_LINE,Line,_}, Line) -> +bumps_line1({BumpInstr,Line,_}, Line, {BumpInstr,_,_}) -> throw(true); -bumps_line1([E | Es], Line) -> - bumps_line1(E, Line), - bumps_line1(Es, Line); -bumps_line1(T, Line) when is_tuple(T) -> - bumps_line1(tuple_to_list(T), Line); -bumps_line1(_, _) -> +bumps_line1([E | Es], Line, Bump) -> + bumps_line1(E, Line, Bump), + bumps_line1(Es, Line, Bump); +bumps_line1(T, Line, Bump) when is_tuple(T) -> + bumps_line1(tuple_to_list(T), Line, Bump); +bumps_line1(_, _, _Bump) -> false. %% Insert an executable_line instruction in the abstract code. bump_call(Vars, Line) -> - #vars{module=M,function=F,arity=A,clause=C,index_fun=GetIndex} = Vars, + #vars{module=M,function=F,arity=A,clause=C,index_fun=GetIndex, + bump_instr=BumpInstr} = Vars, Index = GetIndex(M, F, A, C, Line), - {?EXECUTABLE_LINE,Line,Index}. + {BumpInstr,Line,Index}. %%% End of fix of last expression. @@ -464,11 +502,11 @@ munge_expr({'catch',Anno,Expr}, Vars0) -> munge_expr({call,Anno1,{remote,Anno2,ExprM,ExprF},Exprs}, Vars0) -> {MungedExprM, Vars1} = munge_expr(ExprM, Vars0), {MungedExprF, Vars2} = munge_expr(ExprF, Vars1), - {MungedExprs, Vars3} = munge_exprs(Exprs, Vars2), + {MungedExprs, Vars3} = munge_args(Exprs, Vars2), {{call,Anno1,{remote,Anno2,MungedExprM,MungedExprF},MungedExprs}, Vars3}; munge_expr({call,Anno,Expr,Exprs}, Vars0) -> {MungedExpr, Vars1} = munge_expr(Expr, Vars0), - {MungedExprs, Vars2} = munge_exprs(Exprs, Vars1), + {MungedExprs, Vars2} = munge_args(Exprs, Vars1), {{call,Anno,MungedExpr,MungedExprs}, Vars2}; munge_expr({lc,Anno,Expr,Qs}, Vars0) -> {MungedExpr, Vars1} = munge_expr(?BLOCK1(Expr), Vars0), @@ -532,6 +570,28 @@ munge_expr({bin_element,Anno,Value,Size,TypeSpecifierList}, Vars0) -> munge_expr(Form, Vars0) -> {Form, Vars0}. +munge_args(Args0, #vars{in_guard=false,bump_instr=debug_line}=Vars) -> + %% We want to have `debug_line` instructions inserted before each line in + %% this example: + %% + %% bar:f( + %% bar:g(X), + %% bar:h(X)). + Args = [case is_atomic(Arg) of + true -> Arg; + false -> ?BLOCK(Arg) + end || Arg <- Args0], + munge_exprs(Args, Vars); +munge_args(Args, Vars) -> + munge_exprs(Args, Vars). + +is_atomic({atom,_,_}) -> true; +is_atomic({float,_,_}) -> true; +is_atomic({integer,_,_}) -> true; +is_atomic({nil,_}) -> true; +is_atomic({var,_,_}) -> true; +is_atomic(_) -> false. + munge_exprs(Exprs, Vars) -> munge_exprs(Exprs, Vars, []). diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index 950e82d1a1d4..d4d19bcfb746 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -274,7 +274,8 @@ function({function,_,Name,Arity,Cs0}, Module, Opts) St0 = #core{vcount=0,function={Name,Arity},opts=Opts, dialyzer=member(dialyzer, Opts), ws=Ws0,file=[{file,File}]}, - {B0,St1} = body(Cs0, Name, Arity, St0), + {Cs1,Anno} = handle_debug_line(Cs0, St0), + {B0,St1} = body(Cs1, Name, Arity, St0), %% ok = function_dump(Name, Arity, "body:~n~p~n",[B0]), {B1,St2} = ubody(B0, St1), %% ok = function_dump(Name, Arity, "ubody:~n~p~n",[B1]), @@ -282,13 +283,23 @@ function({function,_,Name,Arity,Cs0}, Module, Opts) %% ok = function_dump(Name, Arity, "cbody:~n~p~n",[B2]), {B3,#core{ws=Ws,load_nif=LoadNif}} = lbody(B2, St3), %% ok = function_dump(Name, Arity, "lbody:~n~p~n",[B3]), - {{#c_var{name={Name,Arity}},B3},Ws,LoadNif} + {{#c_var{anno=Anno,name={Name,Arity}},B3},Ws,LoadNif} catch Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [Name,Arity]), erlang:raise(Class, Error, Stack) end. +handle_debug_line(Cs0, #core{opts=Opts}=St) -> + maybe + true ?= member(beam_debug_info, Opts), + [{clause,_,_,_,[{debug_line,Line,Index}|_]}|Cs] ?= Cs0, + {Cs,[{debug_line,{lineno_anno(Line, St),Index}}]} + else + _ -> + {Cs0,[]} + end. + body(Cs0, Name, Arity, St0) -> Anno = lineno_anno(element(2, hd(Cs0)), St0), FunAnno = [{function,{Name,Arity}} | Anno], @@ -986,9 +997,10 @@ expr({op,L,Op,L0,R0}, St0) -> {#icall{anno=#a{anno=LineAnno}, %Must have an #a{} module=#c_literal{anno=LineAnno,val=erlang}, name=#c_literal{anno=LineAnno,val=Op},args=As},Aps,St1}; -expr({executable_line,Loc,Index}, St0) -> +expr({Op,Loc,Index}, St0) when Op =:= executable_line; + Op =:= debug_line -> {#iprimop{anno=#a{anno=lineno_anno(Loc, St0)}, - name=#c_literal{val=executable_line}, + name=#c_literal{val=Op}, args=[#c_literal{val=Index}]},[],St0}; expr({ssa_check_when,L,WantedResult,Args,Tag,Clauses}, St) -> {#c_opaque{anno=full_anno(L, St),val={ssa_check_when,WantedResult,Tag,Args,Clauses}}, [], St}. @@ -1205,7 +1217,11 @@ try_after(Line, Es0, As0, St0) -> {V, St3} = new_var(St2), % (must not exist in As1) LineAnno = lineno_anno(Line, St3), - case is_iexprs_small(As, 20) of + %% If BEAM debug info has been requested, we must not duplicate + %% `debug_line` instructions. + BeamDebugInfo = member(beam_debug_info, St0#core.opts), + + case not BeamDebugInfo andalso is_iexprs_small(As, 20) of true -> try_after_small(LineAnno, Es, As, V, St3); false -> try_after_large(LineAnno, Es, As, V, St3) end. @@ -3104,8 +3120,16 @@ uexprs([#iexprs{bodies=Es0}|Les], Ks0, St0) -> uexprs([#imatch{anno=A,pat=P0,arg=Arg,fc=Fc}|Les], Ks, St0) -> case upat_is_new_var(P0, Ks) of true -> - %% Assignment to a new variable. - uexprs([#iset{var=P0,arg=Arg}|Les], Ks, St0); + case P0 of + #c_var{name='_'} -> + %% We need to rename '_' to a fresh name to + %% ensure that '_' does not end up in the debug + %% information. + {Var,St1} = new_var(St0), + uexprs([#iset{var=Var,arg=Arg}|Les], Ks, St1); + _ -> + uexprs([#iset{var=P0,arg=Arg}|Les], Ks, St0) + end; false when Les =:= [] -> %% Need to explicitly return match "value", make %% safe for efficiency. diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile index dc3cbe128b1c..5e547dd46e57 100644 --- a/lib/compiler/test/Makefile +++ b/lib/compiler/test/Makefile @@ -11,6 +11,7 @@ MODULES= \ beam_block_SUITE \ beam_bounds_SUITE \ beam_validator_SUITE \ + beam_debug_info_SUITE \ beam_disasm_SUITE \ beam_doc_SUITE \ beam_except_SUITE \ @@ -167,6 +168,8 @@ ERL_FILES= $(MODULES:%=%.erl) CORE_FILES= $(CORE_MODULES:%=%.core) ERL_DUMMY_FILES= $(CORE_MODULES:%=%.erl) +BEAM_OPCODES_HRL=$(ERL_TOP)/lib/compiler/src/beam_opcodes.hrl + ##TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) ##INSTALL_PROGS= $(TARGET_FILES) @@ -309,6 +312,7 @@ release_tests_spec: make_emakefile done $(INSTALL_DATA) $(ERL_DUMMY_FILES) "$(RELSYSDIR)" rm $(ERL_DUMMY_FILES) + $(INSTALL_DATA) $(BEAM_OPCODES_HRL) "$(RELSYSDIR)" chmod -R u+w "$(RELSYSDIR)" @tar cf - *_SUITE_data property_test | (cd "$(RELSYSDIR)"; tar xf -) diff --git a/lib/compiler/test/beam_debug_info_SUITE.erl b/lib/compiler/test/beam_debug_info_SUITE.erl new file mode 100644 index 000000000000..c01455a81586 --- /dev/null +++ b/lib/compiler/test/beam_debug_info_SUITE.erl @@ -0,0 +1,706 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2024. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(beam_debug_info_SUITE). + +%% Make sure that we test running code compiled using the +%% `beam_debug_info` option. This will ensure that we test +%% `beam_disasm` on a module with debug information. +-compile([beam_debug_info]). + +-include("beam_opcodes.hrl"). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, + init_per_group/2,end_per_group/2, + smoke/1, + fixed_bugs/1, + empty_module/1, + call_in_call_args/1, + missing_vars/1]). + +suite() -> [{ct_hooks,[ts_install_cth]}]. + +all() -> + [smoke, + {group,p}]. + +groups() -> + [{p,test_lib:parallel(), + [fixed_bugs, + empty_module, + call_in_call_args, + missing_vars]}]. + +init_per_suite(Config) -> + id(Config), + test_lib:recompile(?MODULE), + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_GroupName, Config) -> + Config. + +end_per_group(_GroupName, Config) -> + Config. + +smoke(_Config) -> + TestBeams0 = get_unique_beam_files(), + TestBeams = compiler_beams() ++ TestBeams0, + + S = ~""" + Below, for each module, there is a list of functions with + variables missing in the BEAM debug info. Note that there + will probably never be possible to have all variables + present in the debug info, because some variables die before + a `debug_line` instruction is reached. + + ** means that at least one of the missing variables is + significant (does not start with an underscore). + + """, + io:put_chars(S), + + test_lib:p_run(fun do_smoke/1, TestBeams). + +compiler_beams() -> + filelib:wildcard(filename:join([code:lib_dir(compiler), "ebin", "*.beam"])). + +do_smoke(Beam) -> + try + {ok,{Mod,[{abstract_code,{raw_abstract_v1,Abstr0}}]}} = + beam_lib:chunks(Beam, [abstract_code]), + + %% beam_validator will check for each `debug_line` instruction + %% that the frame size is correct and that all referenced BEAM + %% registers are valid. + {ok,Mod,Code} = compile:forms(Abstr0, + [beam_debug_info,binary,report_errors]), + {ok,_,Abstr} = compile:forms(Abstr0, + [beam_debug_info,dexp,binary,report_errors]), + SrcVars = source_variables(Abstr), + IndexToFunctionMap = abstr_debug_lines(Abstr), + DebugInfo = get_debug_info(Mod, Code), + + {DbgVars,DbgLiterals} = debug_info_vars(DebugInfo, IndexToFunctionMap), + + %% The debug information must only contain variables that are + %% present in the source code. If the sanity check below + %% fails, it could be for one of the following reasons: + %% + %% * A compiler pass has introduced a new temporary variable + %% whose name is a legal Erlang variable name. (Such + %% temporary variables are supposed to have invalid names, + %% such as `rec1`.) + %% + %% * Something is wrong in the mapping from `debug_line` + %% instruction to function names, causing a variable to be + %% collected into the wrong function. (See + %% abstr_debug_lines/1.) + %% + %% * A heuristic in source_variables/1 is wrong, causing variables + %% that actually are present in the debug information to be + %% removed from the list of variables from the source code. + + [] = family_difference(DbgVars, SrcVars), + + %% Now figure out which variables are missing from the debug info + %% and print them. + AllDbg = family_union(DbgVars, DbgLiterals), + Diff0 = family_difference(SrcVars, AllDbg), + Diff = [begin + {Vars,B} = format_vars(Vars0), + S = io_lib:format("~p/~p: ~ts", [F,A,Vars]), + [" ",case B of + true -> " " ++ S; + false -> "** " ++ S + end,"\n"] + end || {{F,A},Vars0} <:- Diff0], + io:format("~p:\n~ts", [Mod,Diff]) + catch + throw:{error,Error} -> + io:format("*** compilation failure '~p' for file ~s\n", + [Error,Beam]), + error; + Class:Error:Stk -> + io:format("~p: ~p ~p\n~p\n", [Beam,Class,Error,Stk]), + error + end. + +format_vars(Vs) -> + Str = lists:join(", ", [io_lib:format("~ts", [V]) || V <- Vs]), + B = lists:all(fun(V) -> + case atom_to_binary(V) of + <<"_",_/binary>> -> true; + _ -> false + end + end, Vs), + {Str,B}. + +debug_info_vars(DebugInfo, IndexToFunctionMap) -> + {Vars0,Literals0} = debug_info_vars_1(DebugInfo, IndexToFunctionMap, [], []), + Vars = family_union(Vars0), + Literals = family_union(Literals0), + {Vars,Literals}. + +debug_info_vars_1([{I,{_FrameSize,List}}|T], IndexToFunctionMap, VarAcc, LitAcc) -> + case debug_info_vars_2(List, [], []) of + {[],[]} -> + debug_info_vars_1(T, IndexToFunctionMap, VarAcc, LitAcc); + {Vars,Literals} -> + F = map_get(I, IndexToFunctionMap), + debug_info_vars_1(T, IndexToFunctionMap, + [{F,Vars}|VarAcc], [{F,Literals}|LitAcc]) + end; +debug_info_vars_1([], _, VarAcc, LitAcc) -> + {VarAcc,LitAcc}. + +debug_info_vars_2([{Name,_Value}|T], VarAcc, LitAcc) when is_integer(Name) -> + debug_info_vars_2(T, VarAcc, LitAcc); +debug_info_vars_2([{Name0,Value}|T], VarAcc, LitAcc) when is_binary(Name0) -> + Name = binary_to_atom(Name0), + case Value of + {x,_} -> debug_info_vars_2(T, [Name|VarAcc], LitAcc); + {y,_} -> debug_info_vars_2(T, [Name|VarAcc], LitAcc); + {value,_} -> debug_info_vars_2(T, VarAcc, [Name|LitAcc]) + end; +debug_info_vars_2([], VarAcc, LitAcc) -> + {VarAcc,LitAcc}. + +family_union(S0) -> + S1 = sofs:relation(S0, [{function,[variable]}]), + S2 = sofs:relation_to_family(S1), + S3 = sofs:family_union(S2), + sofs:to_external(S3). + +family_union(F0, F1) -> + S0 = sofs:relation(F0, [{function,[variable]}]), + S1 = sofs:relation(F1, [{function,[variable]}]), + S2 = sofs:family_union(S0, S1), + sofs:to_external(S2). + +family_difference(F0, F1) -> + S0 = sofs:family(F0, [{function,[variable]}]), + S1 = sofs:family(F1, [{function,[variable]}]), + S2 = sofs:family_difference(S0, S1), + SpecFun = fun(S) -> sofs:no_elements(S) =/= 0 end, + S3 = sofs:family_specification(SpecFun, S2), + sofs:to_external(S3). + +%% +%% Extract variables mentioned in the source code. Try to remove +%% variables that will never show up in the debug information; for +%% examples, definitions of variables that are not followed by any +%% `debug_line` instructions can be ignored. +%% +source_variables(Abstr) -> + [{{Name,Arity},extract_src_vars(F)} || + {function,_,Name,Arity,_}=F <- Abstr]. + +extract_src_vars(F) -> + L1 = extract_src_vars(F, true, #{}), + L2 = [V || V := true <- L1], + lists:sort(L2). + +extract_src_vars({var,_,'_'}, _Lc, Acc) -> + Acc; +extract_src_vars({var,_,Name}, _Lc, Acc0) -> + case atom_to_binary(Name) of + <<"cov",_/binary>> -> + %% Ignore variable added by the sys_coverage pass. + Acc0; + <<"rec",_/binary>> -> + %% Ignore variable added by the erl_expand_pass. + Acc0; + _ -> + true = beam_ssa_codegen:is_original_variable(Name), + Acc0#{Name => true} + end; +extract_src_vars({atom,_,_}, _Lc, Acc) -> Acc; +extract_src_vars({bin,_,Es}, _Lc, Acc) -> + extract_args(Es, Acc); +extract_src_vars({bin_element,_,Val,Size,_}, _Lc, Acc0) -> + Acc1 = extract_src_vars(Val, false, Acc0), + case Size of + default -> Acc1; + _ -> extract_src_vars(Size, false, Acc1) + end; +extract_src_vars({char,_,_}, _Lc, Acc) -> Acc; +extract_src_vars({float,_,_}, _Lc, Acc) -> Acc; +extract_src_vars({integer,_,_}, _Lc, Acc) -> Acc; +extract_src_vars({nil,_}, _Lc, Acc) -> Acc; +extract_src_vars({string,_,_}, _Lc, Acc) -> Acc; +extract_src_vars({cons,_,Hd,Tl}, Lc, Acc0) -> + Acc1 = extract_src_vars(Hd, Lc, Acc0), + extract_src_vars(Tl, Lc, Acc1); +extract_src_vars({map,_,Fs}, _Lc, Acc0) -> + extract_args(Fs, Acc0); +extract_src_vars({map,_,M,Fs}, Lc, Acc0) -> + Acc1 = extract_src_vars(M, Lc, Acc0), + extract_args(Fs, Acc1); +extract_src_vars({map_field_assoc,_,K,V}, _Lc, Acc0) -> + Acc1 = extract_src_vars(K, false, Acc0), + extract_src_vars(V, false, Acc1); +extract_src_vars({map_field_exact,_,K,V}, _Lc, Acc0) -> + Acc1 = extract_src_vars(K, false, Acc0), + extract_src_vars(V, false, Acc1); +extract_src_vars({tuple,_,Es}, _Lc, Acc) -> + extract_args(Es, Acc); +extract_src_vars({call,_,F,As}, Lc, Acc0) -> + Acc1 = extract_src_vars(F, Lc, Acc0), + extract_args(As, Acc1); +extract_src_vars({remote,_,Mod,Name}, Lc, Acc0) -> + Acc1 = extract_src_vars(Mod, Lc, Acc0), + extract_src_vars(Name, Lc, Acc1); +extract_src_vars({match,_,P,E}, Lc, Acc0) -> + Acc1 = extract_src_vars(P, false, Acc0), + extract_src_vars(E, Lc, Acc1); +extract_src_vars({op,_,_Name,Arg}, Lc, Acc0) -> + extract_src_vars(Arg, Lc, Acc0); +extract_src_vars({op,_,_Name,Lhs,Rhs}, Lc, Acc0) -> + Acc1 = extract_src_vars(Lhs, false, Acc0), + extract_src_vars(Rhs, Lc, Acc1); +extract_src_vars({debug_line,_,_}, _Lc, Acc) -> + Acc; +extract_src_vars({executable_line,_,_}, _Lc, Acc) -> + Acc; +extract_src_vars({named_fun,_,Name,Cs}, Lc, Acc0) -> + case any_debug_line_instrs(Cs) of + false -> + %% Since there are no `debug_line` instructions within this fun, + %% none of the variables defined in the fun should ever + %% show up in the debug info. + Acc0; + true when Name =/= '_' -> + Acc = case Name of + '_' -> Acc0; + _ -> extract_src_vars({var,anno,Name}, Lc, Acc0) + end, + extract_cs(Cs, true, Acc) + end; +extract_src_vars({function,_Anno,_,_,Cs}, _Lc, Acc0) -> + case any_debug_line_instrs(Cs) of + true -> + extract_cs(Cs, true, Acc0); + false -> + %% There are no `debug_line` instructions in this + %% function. This happens if code has been placed in a + %% header filer, or if the `-file()` attribute has been + %% used to change the name of the source file. + Acc0 + end; +extract_src_vars({'fun',_Anno,{clauses,Cs}}, _Lc, Acc0) -> + case any_debug_line_instrs(Cs) of + true -> + extract_cs(Cs, true, Acc0); + false -> + Acc0 + end; +extract_src_vars({'fun',_Anno,_}, _Lc, Acc0) -> Acc0; +extract_src_vars({block,_Anno,Es}, Lc, Acc0) -> + extract_body(Es, Lc, Acc0); +extract_src_vars({'receive',_Anno,Cs}, Lc, Acc0) -> + extract_cs(Cs, Lc, Acc0); +extract_src_vars({'receive',_Anno,Cs,_To,ToE}, Lc, Acc0) -> + Acc1 = extract_cs(Cs, Lc, Acc0), + extract_body(ToE, Lc, Acc1); +extract_src_vars({'maybe',_Anno,Body}, Lc, Acc0) -> + extract_body(Body, Lc, Acc0); +extract_src_vars({'maybe',_Anno,Body,{'else',_,ElseClauses}}, Lc, Acc0) -> + Acc1 = extract_body(Body, Lc, Acc0), + extract_cs(ElseClauses, Lc, Acc1); +extract_src_vars({'maybe_match',_Anno,P,E}, Lc, Acc0) -> + Acc1 = extract_src_vars(P, false, Acc0), + extract_src_vars(E, Lc, Acc1); +extract_src_vars({'case',_Anno,E,Cs}, Lc, Acc0) -> + Acc1 = extract_src_vars(E, false, Acc0), + extract_cs(Cs, Lc, Acc1); +extract_src_vars({'if',_Anno,Cs}, Lc, Acc0) -> + extract_cs(Cs, Lc, Acc0); +extract_src_vars({'try',_Anno,Es,Scs,Ccs,As}, Lc, Acc0) -> + Acc1 = extract_body(Es, false, Acc0), + Acc2 = extract_cs(Scs, Lc, Acc1), + Acc3 = extract_cs(Ccs, Lc, Acc2), + extract_body(As, Lc, Acc3); +extract_src_vars({'catch',_Anno,E}, Lc, Acc0) -> + extract_src_vars(E, Lc, Acc0); +extract_src_vars({zip,_,Qs0}, _Lc, Acc0) -> + Qs = extract_sv_qs(Qs0), + extract_args(Qs, Acc0); +extract_src_vars({C,_,Build,Qs0}, Lc, Acc0) + when C =:= lc; C =:= bc; C =:= mc -> + case any_debug_line_instrs(Build) of + false -> + Qs = extract_sv_qs(Qs0), + case any_debug_line_instrs(Qs) of + false -> + Acc0; + true -> + extract_args(Qs, Acc0) + end; + true -> + Acc1 = extract_src_vars(Build, Lc, Acc0), + extract_args(Qs0, Acc1) + end; +extract_src_vars({G,_,P,E}, _Lc, Acc0) -> + true = is_generator(G), %Assertion. + Acc1 = extract_src_vars(P, false, Acc0), + extract_src_vars(E, false, Acc1). + +is_generator(generate) -> true; +is_generator(b_generate) -> true; +is_generator(m_generate) -> true; +is_generator(generate_strict) -> true; +is_generator(b_generate_strict) -> true; +is_generator(m_generate_strict) -> true; +is_generator(_) -> false. + +extract_cs([{clause,_,Pats,Gs,Body}|Cs], Lc, Acc0) -> + case Lc andalso not any_debug_line_instrs(Body) of + true -> + extract_cs(Cs, Lc, Acc0); + false -> + Acc1 = extract_args(Pats, Acc0), + Acc2 = extract_guards(Gs, Acc1), + Acc3 = extract_body(Body, Lc, Acc2), + extract_cs(Cs, Lc, Acc3) + end; +extract_cs([], _, Acc) -> + Acc. + +extract_body([I], Lc, Acc) -> + case Lc andalso not any_debug_line_instrs(I) of + true -> + Acc; + false -> + extract_src_vars(I, Lc, Acc) + end; +extract_body([I|Is], Lc, Acc0) -> + Acc = extract_src_vars(I, false, Acc0), + extract_body(Is, Lc, Acc); +extract_body([], _Lc, Acc) -> Acc. + +extract_args([A|As], Acc) -> + extract_args(As, extract_src_vars(A, false, Acc)); +extract_args([], Acc) -> Acc. + +extract_guards([A|As], Acc) -> + extract_guards(As, extract_args(A, Acc)); +extract_guards([], Acc) -> Acc. + +extract_sv_qs([{block,BlkL,[{executable_line,_,_}|Bs]}|Qs1]) -> + %% Note: `debug_line` instructions are `executable_line` + %% instructions in the abstract code. + [{block,BlkL,Bs}|extract_sv_qs_1(Qs1)]; +extract_sv_qs(Qs) -> Qs. + +extract_sv_qs_1([Q|Qs]) -> + case abstr_extract_debug_lines(Qs, []) of + [] -> + [Q]; + [_|_] -> + [Q|extract_sv_qs_1(Qs)] + end; +extract_sv_qs_1([]) -> []. + +any_debug_line_instrs(Abstr) -> + abstr_extract_debug_lines(Abstr, []) =/= []. + +%% +%% Return a mapping from `debug_line` instruction index to function. +%% +abstr_debug_lines(Abstr) -> + S0 = [{{Name,Arity},abstr_extract_debug_lines(Body)} || + {function,_,Name,Arity,Body} <- Abstr], + S1 = sofs:family(S0, [{function,[line]}]), + S2 = sofs:family_to_relation(S1), + S3 = sofs:converse(S2), + S4 = sofs:to_external(S3), + maps:from_list(S4). + +abstr_extract_debug_lines(Abstr) -> + abstr_extract_debug_lines(Abstr, []). + +abstr_extract_debug_lines({debug_line,_,Index}, Acc) -> + [Index|Acc]; +abstr_extract_debug_lines([H|T], Acc0) -> + Acc1 = abstr_extract_debug_lines(H, Acc0), + abstr_extract_debug_lines(T, Acc1); +abstr_extract_debug_lines(Tuple, Acc0) when is_tuple(Tuple) -> + abstr_extract_debug_lines(tuple_to_list(Tuple), Acc0); +abstr_extract_debug_lines(_, Acc) -> Acc. + +%%% +%%% Read and disassemble the BEAM debug information from the "DbgB" +%%% chunk of a BEAM file. +%%% +get_debug_info(Mod, Beam) -> + {ok,{Mod,[{"DbgB",DebugInfo0}, + {atoms,Atoms0}]}} = beam_lib:chunks(Beam, ["DbgB",atoms]), + Atoms = maps:from_list(Atoms0), + Literals = case beam_lib:chunks(Beam, ["LitT"]) of + {ok,{Mod,[{"LitT",Literals0}]}} -> + decode_literal_table(Literals0); + {error,_,_} -> + [] + end, + Op = beam_opcodes:opcode(call, 2), + <> = DebugInfo0, + 0 = Version, + DebugInfo = decode_debug_info(DebugInfo1, Literals, Atoms, Op), + lists:zip(lists:seq(1, length(DebugInfo)), DebugInfo). + +decode_literal_table(<<0:32,N:32,Tab/binary>>) -> + #{Index => binary_to_term(Literal) || + Index <- lists:seq(0, N - 1) && + <> <:= Tab}. + +decode_debug_info(Code0, Literals, Atoms, Op) -> + case Code0 of + <> -> + {FrameSize0,Code2} = decode_arg(Code1, Literals, Atoms), + FrameSize = case FrameSize0 of + nil -> none; + {atom,entry} -> entry; + _ -> FrameSize0 + end, + {{list,List0},Code3} = decode_arg(Code2, Literals, Atoms), + List = decode_list(List0), + [{FrameSize,List}|decode_debug_info(Code3, Literals, Atoms, Op)]; + <<>> -> + [] + end. + +decode_list([{integer,Var}|T]) when is_integer(Var) -> + decode_list([{literal,Var}|T]); +decode_list([{literal,Var},Where0|T]) -> + Where = case Where0 of + {literal,Lit} -> {value,Lit}; + {atom,A} -> {value,A}; + {integer,I} -> {value,I}; + nil -> {value,[]}; + {x,_} -> Where0; + {y,_} -> Where0 + end, + [{Var,Where}|decode_list(T)]; +decode_list([]) -> []. + +decode_args(0, Code, _Literals, _Atoms) -> + {[],Code}; +decode_args(N, Code0, Literals, Atoms) when is_integer(N), N > 0 -> + {Arg,Code1} = decode_arg(Code0, Literals, Atoms), + {Args,Code2} = decode_args(N - 1, Code1, Literals, Atoms), + {[Arg|Args],Code2}. + +decode_arg(Code0, Literals, Atoms) -> + case decode_raw_arg(Code0) of + {nil,_}=Res -> Res; + {{u,N},Code1} -> + {N,Code1}; + {{atom,Index},Code1} -> + {{atom,map_get(Index, Atoms)},Code1}; + {{integer,_},_}=Res -> Res; + {{x,_},_}=Res -> Res; + {{y,_},_}=Res -> Res; + {{z,1},Code1} -> + {{u,N},Code2} = decode_raw_arg(Code1), + {List,Code3} = decode_args(N, Code2, Literals, Atoms), + {{list,List},Code3}; + {{z,4},Code1} -> + {{u,N},Code2} = decode_raw_arg(Code1), + {{literal,map_get(N, Literals)},Code2} + end. + +decode_raw_arg(<<0:4,0:1,?tag_a:3,Code/binary>>) -> + {nil,Code}; +decode_raw_arg(<>) -> + {{decode_tag(Tag),N},Code}; +decode_raw_arg(<<2#111:3,1:1,1:1,Tag:3,Code0/binary>>) -> + {{u,W0},Code1} = decode_raw_arg(Code0), + W = W0 + 9, + <> = Code1, + {{decode_tag(Tag),N},Code2}; +decode_raw_arg(<>) -> + W = W0 + 2, + <> = Code0, + {{decode_tag(Tag),N},Code1}; +decode_raw_arg(<>) -> + N = (High bsl 8) bor Low, + {{decode_tag(Tag),N},Code0}. + +decode_tag(?tag_u) -> u; +decode_tag(?tag_i) -> integer; +decode_tag(?tag_a) -> atom; +decode_tag(?tag_x) -> x; +decode_tag(?tag_y) -> y; +decode_tag(?tag_z) -> z. + +%%% +%%% Other test cases. +%%% + +fixed_bugs(_Config) -> + ok = unassigned_yreg(ok), + {'EXIT',_} = catch unassigned_yreg(not_ok), + + ~"xyz" = wrong_frame_size(id(~"xyz")), + boom = catch wrong_frame_size(id(42)), + + {ok,error} = no_function(ok), + + ok. + +unassigned_yreg(V) -> + case id(V) of + _ -> + case V of ok -> ok end, + case catch id(whatever) of + Y -> + case id(true) of + true -> + id(Y), + ok; + false -> + ok + end + end + end. + +wrong_frame_size(X) -> + id(X), + case id(X) of + Y when is_binary(Y) -> Y; + _Err -> throw(boom) + end. + +no_function(X) -> + case catch id(X) of + ok -> + case catch id(error) of + Err -> + id(0), + id({X, Err}) + end; + Err -> + id(0), + id({X, Err}) + end. + + +empty_module(_Config) -> + Mod = list_to_atom(?MODULE_STRING ++ "_" ++ + atom_to_list(?FUNCTION_NAME)), + Empty = [{attribute,{1,1},file,{atom_to_list(Mod),1}}, + {attribute,{1,2},module,Mod}, + {eof,{3,1}}], + {ok,Mod,_Code} = compile:forms(Empty, [beam_debug_info,report]), + + ok. + +call_in_call_args(Config) -> + M = ?FUNCTION_NAME, + PrivDir = proplists:get_value(priv_dir, Config), + SrcName = filename:join(PrivDir, atom_to_list(M) ++ ".erl"), + + S = ~""" + -module(call_in_call_args). + -export([f/1]). + + f(X) -> + bar:g( + bar:h(X), + id(X) + ). + id(I) -> I. + """, + + ok = file:write_file(SrcName, S), + {ok,M,Asm} = compile:file(SrcName, [report,beam_debug_info,binary,to_asm]), + {M,_,_,[{function,f,1,_,Is}|_],_} = Asm, + DebugLines = [I || I <- Is, element(1, I) =:= debug_line], + io:format("~p\n", [DebugLines]), + 4 = length(DebugLines), + + ok. + +missing_vars(Config) -> + M = ?FUNCTION_NAME, + PrivDir = proplists:get_value(priv_dir, Config), + SrcName = filename:join(PrivDir, atom_to_list(M) ++ ".erl"), + + S = ~""" + -module(missing_vars). %%L01 + -export([f/3]). %%L02 + f(X, Y, Z0) -> %%L03 + case X of %%L04 + false -> %%L05 + Z1 = Z0#{k := Y}, %%L06 + foo:go(X), %%L07 + Z1; %%L08 + _ -> %%L09 + Z1 = Z0#{k := X}, %%L10 + Z1 %%L11 + end. %%L12 + """, + + ok = file:write_file(SrcName, S), + {ok,M,Asm} = compile:file(SrcName, [report,beam_debug_info,binary,to_asm]), + {M,_,_,[{function,f,3,_,Is}|_],_} = Asm, + DebugLines0 = [begin + {location,_File,Line} = lists:keyfind(location, 1, Anno), + {Line,FrameSz,[Name || {Name,_} <- Vars]} + end || {debug_line,Anno,_,_,{FrameSz,Vars}} <- Is], + DebugLines = lists:sort(DebugLines0), + io:format("~p\n", [DebugLines]), + Expected = [{3,entry,[{integer,1},{integer,2},{integer,3}]}, + {4,none,['X','Y','Z0']}, + {6,none,['X','Y','Z0']}, + {7,none,['X','Z0','Z1']}, + {8,1,['Z1']}, + {10,none,['X','Y','Z0']}, + {11,none,['Y','Z0','Z1']}], + + ?assertEqual(Expected, DebugLines), + + ok. + + +%%% +%%% Common utility functions. +%%% + +get_unique_beam_files() -> + F = fun IsCloned(ModString) -> + case ModString of + "_dialyzer_SUITE" -> true; + "_r25_SUITE" -> true; + [_|T] -> IsCloned(T); + _ -> false + end + end, + test_lib:get_unique_files(".beam", F). + +id(I) -> I. diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl index 0c2fcb358d1e..65cdacd3c850 100644 --- a/lib/compiler/test/compile_SUITE.erl +++ b/lib/compiler/test/compile_SUITE.erl @@ -1476,7 +1476,7 @@ beam_ssa_pp_1(Mod, Abstr, Outdir) -> %% Test that warnings contain filenames and line numbers. warnings(_Config) -> - Files = get_unique_files(".erl"), + Files = test_lib:get_unique_files(".erl"), test_lib:p_run(fun do_warnings/1, Files). do_warnings(F) -> @@ -1749,7 +1749,10 @@ bc_options(Config) -> {182, small, [r26]}, {182, small, []}, - {183, small, [line_coverage]} + {183, small, [line_coverage]}, + + {184, small, [beam_debug_info]}, + {184, big, [beam_debug_info]} ], Test = fun({Expected,Mod,Options}) -> @@ -2371,22 +2374,7 @@ compile_and_verify(Name, Target, Opts) -> Opts = BeamOpts. get_unique_beam_files() -> - get_unique_files(".beam"). - -get_unique_files(Ext) -> - Wc = filename:join(filename:dirname(code:which(?MODULE)), "*"++Ext), - [F || F <- filelib:wildcard(Wc), - not is_cloned(F, Ext), not is_lfe_module(F, Ext)]. - -is_cloned(File, Ext) -> - Mod = list_to_atom(filename:basename(File, Ext)), - test_lib:is_cloned_mod(Mod). - -is_lfe_module(File, Ext) -> - case filename:basename(File, Ext) of - "lfe_" ++ _ -> true; - _ -> false - end. + test_lib:get_unique_files(".beam"). %% Compiles a test module and returns the list of errors and warnings. diff --git a/lib/compiler/test/compile_SUITE_data/small.erl b/lib/compiler/test/compile_SUITE_data/small.erl index 37cd270e5047..1efb66cb2ebb 100644 --- a/lib/compiler/test/compile_SUITE_data/small.erl +++ b/lib/compiler/test/compile_SUITE_data/small.erl @@ -1,6 +1,6 @@ -module(small). --export([go/0,go/2]). +-export([go/0,go/2,latin1_var/1]). -small_attribute({value,3}). @@ -43,6 +43,8 @@ recv() -> tmo = F(), ok. +latin1_var(Överskott) -> + Överskott + 1. id(I) -> I. diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl index 3b1cff569212..312d160f711e 100644 --- a/lib/compiler/test/test_lib.erl +++ b/lib/compiler/test/test_lib.erl @@ -23,8 +23,10 @@ -compile({no_auto_import,[binary_part/2]}). -export([id/1,recompile/1,recompile_core/1,parallel/0, uniq/0,opt_opts/1,get_data_dir/1, - is_cloned_mod/1,smoke_disasm/1,p_run/2,p_run/3, - highest_opcode/1]). + smoke_disasm/1, + p_run/2,p_run/3, + highest_opcode/1, + get_unique_files/1,get_unique_files/2]). %% Used by test case that override BIFs. -export([binary_part/2,binary/1]). @@ -85,6 +87,7 @@ opt_opts(Mod) -> %% `options` may not be set at all if +deterministic is enabled. Opts = proplists:get_value(options, Comp, []), lists:filter(fun + (beam_debug_info) -> true; (debug_info) -> true; (dialyzer) -> true; ({feature,_,enable}) -> true; @@ -92,6 +95,7 @@ opt_opts(Mod) -> (inline) -> true; (line_coverage) -> true; (no_badrecord) -> true; + (no_bool_opt) -> true; (no_bs_create_bin) -> true; (no_bsm_opt) -> true; (no_bs_match) -> true; @@ -118,6 +122,7 @@ get_data_dir(Config) -> Data = proplists:get_value(data_dir, Config), Opts = [{return,list}], Suffixes = ["_no_opt_SUITE", + "_no_bool_opt_SUITE", "_no_copt_SUITE", "_no_copt_ssa_SUITE", "_post_opt_SUITE", @@ -131,24 +136,22 @@ get_data_dir(Config) -> re:replace(Acc, Suffix, "_SUITE", Opts) end, Data, Suffixes). -is_cloned_mod(Mod) -> - is_cloned_mod_1(atom_to_list(Mod)). - -%% Test whether Mod is a cloned module. We don't consider modules +%% Test whether the module is cloned. We don't consider modules %% compiled with compatibility for an older release cloned (that %% will improve coverage). -is_cloned_mod_1("_no_opt_SUITE") -> true; -is_cloned_mod_1("_no_copt_SUITE") -> true; -is_cloned_mod_1("_no_copt_ssa_SUITE") -> true; -is_cloned_mod_1("_no_ssa_opt_SUITE") -> true; -is_cloned_mod_1("_no_type_opt_SUITE") -> true; -is_cloned_mod_1("_post_opt_SUITE") -> true; -is_cloned_mod_1("_inline_SUITE") -> true; -is_cloned_mod_1("_no_module_opt_SUITE") -> true; -is_cloned_mod_1("_cover_SUITE") -> true; -is_cloned_mod_1([_|T]) -> is_cloned_mod_1(T); -is_cloned_mod_1([]) -> false. +is_cloned("_no_opt_SUITE") -> true; +is_cloned("_no_bool_opt_SUITE") -> true; +is_cloned("_no_copt_SUITE") -> true; +is_cloned("_no_copt_ssa_SUITE") -> true; +is_cloned("_no_ssa_opt_SUITE") -> true; +is_cloned("_no_type_opt_SUITE") -> true; +is_cloned("_post_opt_SUITE") -> true; +is_cloned("_inline_SUITE") -> true; +is_cloned("_no_module_opt_SUITE") -> true; +is_cloned("_cover_SUITE") -> true; +is_cloned([_|T]) -> is_cloned(T); +is_cloned([]) -> false. %% Return the highest opcode use in the BEAM module. @@ -158,6 +161,25 @@ highest_opcode(Beam) -> <<16:32,FormatNumber:32,HighestOpcode:32,_/binary>> = Code, HighestOpcode. +%% Get all unique files in the test case directory. +get_unique_files(Ext) -> + get_unique_files(Ext, fun(_ModString) -> false end). + +get_unique_files(Ext, IsCloned) when is_function(IsCloned, 1) -> + Wc = filename:join(filename:dirname(code:which(?MODULE)), "*"++Ext), + [F || F <- filelib:wildcard(Wc), + not is_cloned(F, Ext, IsCloned), not is_lfe_module(F, Ext)]. + +is_cloned(File, Ext, IsCloned) -> + ModString = filename:basename(File, Ext), + is_cloned(ModString) orelse IsCloned(ModString). + +is_lfe_module(File, Ext) -> + case filename:basename(File, Ext) of + "lfe_" ++ _ -> true; + _ -> false + end. + %% p_run(fun(Data) -> ok|error, List) -> ok %% Will fail the test case if there were any errors. diff --git a/lib/dialyzer/src/dialyzer_dataflow.erl b/lib/dialyzer/src/dialyzer_dataflow.erl index 05928a450b3e..81ea19135206 100644 --- a/lib/dialyzer/src/dialyzer_dataflow.erl +++ b/lib/dialyzer/src/dialyzer_dataflow.erl @@ -947,6 +947,8 @@ handle_primop(Tree, Map, State) -> {State, Map, t_any()}; nif_start -> {State, Map, t_any()}; + debug_line -> + {State, Map, t_any()}; executable_line -> {State, Map, t_any()}; Other -> diff --git a/lib/dialyzer/src/dialyzer_typesig.erl b/lib/dialyzer/src/dialyzer_typesig.erl index 482a991bad41..e3f5d82aca44 100644 --- a/lib/dialyzer/src/dialyzer_typesig.erl +++ b/lib/dialyzer/src/dialyzer_typesig.erl @@ -438,6 +438,8 @@ traverse(Tree, DefinedVars, State) -> {State, t_any()}; nif_start -> {State, t_any()}; + debug_line -> + {State, t_any()}; executable_line -> {State, t_any()}; Other -> erlang:error({'Unsupported primop', Other}) diff --git a/lib/stdlib/src/erl_expand_records.erl b/lib/stdlib/src/erl_expand_records.erl index f5e79370ccb8..d8ff6c934ed2 100644 --- a/lib/stdlib/src/erl_expand_records.erl +++ b/lib/stdlib/src/erl_expand_records.erl @@ -498,6 +498,8 @@ expr({op,Anno,Op,L0,R0}, St0) -> {{op,Anno,Op,L,R},St2}; expr({executable_line,_,_}=E, St) -> {E, St}; +expr({debug_line,_,_}=E, St) -> + {E, St}; expr({ssa_check_when,_,_,_,_,_}=E, St) -> {E, St}. diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 34eb7cc36f20..06e08e3c9dc4 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -845,6 +845,8 @@ lexpr({remote,_,M,F}, Prec, Opts) -> %% BIT SYNTAX: lexpr({bin,_,Fs}, _, Opts) -> bit_grp(Fs, Opts); +lexpr({debug_line,Line,Index}, _Prec, _Opts) -> + leaf(format("beam_instruction:debug_line(~p, ~p)", [Line,Index])); lexpr({executable_line,Line,Index}, _Prec, _Opts) -> leaf(format("beam_instruction:executable_line(~p, ~p)", [Line,Index])); %% Special case for straight values. From fcbaf38705b9fd492e4d1a9ff58a2fca1dfa3f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 3 Sep 2024 05:52:05 +0200 Subject: [PATCH 5/5] Implement support for debug information in the runtime system This commit implements support for loading the debug information from a BEAM file and the `code:get_debug_info/1` BIF for retrieving the debug information. As an example, given the following module: -module(example). % 1 -export([foo/1]). % 2 foo(A) -> % 4 case A of % 5 0 -> % 6 B = 1, % 7 io:format("~p\n", [B]); % 8 1 -> % 9 C = [1,2,3], % 10 io:format("~p\n", [C]) % 11 end, % 12 A. % 13 sign(N) when N < 0 -> 1; % 15 sign(N) when N == 0 -> 0; % 16 sign(_) -> 1. % 17 here is how to compile it with BEAM debug information and display the debug information: 1> c(example, beam_debug_info). {ok,example} 2> code:get_debug_info(example). [{4,{entry,[{1,{x,0}}]}}, {5,{1,[{<<"A">>,{y,0}}]}}, {7,{1,[{<<"A">>,{y,0}}]}}, {8,{1,[{<<"B">>,{value,1}},{<<"A">>,{y,0}}]}}, {10,{1,[{<<"A">>,{y,0}}]}}, {11,{1,[{<<"C">>,{value,[1,2,3]}},{<<"A">>,{y,0}}]}}, {13,{1,[{<<"A">>,{y,0}}]}}, {15,{entry,[{1,{x,0}}]}}, {15,{none,[{<<"N">>,{x,0}}]}}, {16,{none,[{<<"N">>,{x,0}}]}}, {17,{none,[]}}] List elements having the frame size `entry` refers to a `debug_line` instruction at the beginning of the function (before all clauses). Note that the line number for such entries *may* be the same as the line number for the first clause. All other line numbers are guaranteed to be unique. --- erts/emulator/beam/atom.names | 9 +- erts/emulator/beam/beam_bif_load.c | 132 ++++++++++++ erts/emulator/beam/beam_code.h | 21 ++ erts/emulator/beam/beam_file.c | 223 ++++++++++++++++++++ erts/emulator/beam/beam_file.h | 22 +- erts/emulator/beam/beam_load.c | 2 + erts/emulator/beam/bif.tab | 4 +- erts/emulator/beam/jit/arm/instr_common.cpp | 8 +- erts/emulator/beam/jit/arm/ops.tab | 2 +- erts/emulator/beam/jit/asm_load.c | 81 +++++++ erts/emulator/beam/jit/x86/instr_common.cpp | 6 + erts/emulator/beam/jit/x86/ops.tab | 2 +- lib/compiler/test/beam_debug_info_SUITE.erl | 107 +++++++++- lib/kernel/src/code.erl | 22 +- lib/kernel/src/erl_kernel_errors.erl | 12 ++ lib/kernel/test/code_coverage_SUITE.erl | 3 + 16 files changed, 641 insertions(+), 15 deletions(-) diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index c264082e7561..7e0961cd4fe4 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -111,6 +111,7 @@ atom asynchronous atom atom atom atom_used atom attributes +atom auto atom auto_connect atom await_exit atom await_microstate_accounting_modifications @@ -217,6 +218,7 @@ atom debug_flags atom decentralized_counters atom decimals atom default +atom debug_hash_fixed_number_of_locks atom delay_trap atom demonitor atom deterministic @@ -260,6 +262,7 @@ atom emulator atom enable_trace atom enabled atom endian +atom entry atom env atom ensure_at_least ensure_exactly atom eof @@ -486,6 +489,7 @@ atom new_processes atom new_ports atom new_uniq atom newline +atom nifs atom no atom nomatch atom none @@ -768,10 +772,9 @@ atom warning atom warning_msg atom wordsize atom write_concurrency +atom x atom xor atom x86 +atom y atom yes atom yield -atom nifs -atom auto -atom debug_hash_fixed_number_of_locks diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c index 59bb67ebc6a4..15944c02f1f9 100644 --- a/erts/emulator/beam/beam_bif_load.c +++ b/erts/emulator/beam/beam_bif_load.c @@ -1251,6 +1251,138 @@ any_heap_refs(Eterm* start, Eterm* end, char* mod_start, Uint mod_size) return 0; } +BIF_RETTYPE code_get_debug_info_1(BIF_ALIST_1) +{ +#ifdef BEAMASM + ErtsCodeIndex code_ix; + Module* modp; + const BeamCodeHeader* hdr; + const BeamCodeLineTab* lt; + const BeamDebugTab* debug; + Sint i; + Uint alloc_size; + Eterm result = NIL; + Eterm* hp; + Eterm* hend; + + if (is_not_atom(BIF_ARG_1)) { + BIF_ERROR(BIF_P, BADARG); + } + code_ix = erts_active_code_ix(); + modp = erts_get_module(BIF_ARG_1, code_ix); + if (modp == NULL) { + BIF_ERROR(BIF_P, BADARG); + } + hdr = modp->curr.code_hdr; + if (hdr == NULL) { + BIF_ERROR(BIF_P, BADARG); + } + + lt = hdr->line_table; + + debug = hdr->debug; + if (debug == NULL) { + return am_none; + } + + alloc_size = 0; + + for (i = 0; i < debug->item_count; i++) { + /* [ {Line, {FrameSize, Pairs}} ] */ + alloc_size += 2 + 3 + 3; + /* Pairs = [{Name, Value}], where Value is an atom or 2-tuple. + * + * Assume they are all 2-tuples and HRelease() the excess + * later. */ + alloc_size += debug->items[i].num_vars * (2 + 3 + 3); + } + + hp = HAlloc(BIF_P, alloc_size); + hend = hp + alloc_size; + + for (i = debug->item_count-1; i >= 0; i--) { + BeamDebugItem* items = &debug->items[i]; + Sint frame_size = items->frame_size; + Uint num_vars = items->num_vars; + Eterm *tp = &items->first[2 * (num_vars - 1)]; + Uint32 location_index, location; + Eterm frame_size_term; + Eterm var_list = NIL; + Eterm tmp; + + location_index = items->location_index; + + if (location_index == ERTS_UINT32_MAX) { + continue; + } + if (lt->loc_size == 2) { + location = lt->loc_tab.p2[location_index]; + } else { + ASSERT(lt->loc_size == 4); + location = lt->loc_tab.p4[location_index]; + } + + switch (frame_size) { + case BEAMFILE_FRAMESIZE_ENTRY: + frame_size_term = am_entry; + break; + case BEAMFILE_FRAMESIZE_NONE: + frame_size_term = am_none; + break; + default: + ASSERT(frame_size >= 0); + frame_size_term = make_small(frame_size); + break; + } + + while (num_vars-- != 0) { + Eterm val; + Eterm tag; + + switch(loader_tag(tp[1])) { + case LOADER_X_REG: + tag = am_x; + val = make_small(loader_x_reg_index(tp[1])); + break; + case LOADER_Y_REG: + tag = am_y; + val = make_small(loader_y_reg_index(tp[1])); + break; + default: + tag = am_value; + val = tp[1]; + break; + } + tmp = TUPLE2(hp, tag, val); + hp += 3; + + tmp = TUPLE2(hp, tp[0], tmp); + hp += 3; + + tp -= 2; + + var_list = CONS(hp, tmp, var_list); + hp += 2; + } + + tmp = TUPLE2(hp, frame_size_term, var_list); + hp += 3; + + tmp = TUPLE2(hp, make_small(LOC_LINE(location)), tmp); + hp += 3; + + result = CONS(hp, tmp, result); + hp += 2; + } + + ASSERT(hp <= hend); + HRelease(BIF_P, hend, hp); + return result; +#endif + + BIF_ERROR(BIF_P, BADARG); +} + /* * Release of literal areas... * diff --git a/erts/emulator/beam/beam_code.h b/erts/emulator/beam/beam_code.h index 456d3cf2e840..f9a77b4b01ba 100644 --- a/erts/emulator/beam/beam_code.h +++ b/erts/emulator/beam/beam_code.h @@ -48,6 +48,7 @@ #define MD5_SIZE MD5_DIGEST_LENGTH typedef struct BeamCodeLineTab_ BeamCodeLineTab; +typedef struct BeamDebugTab_ BeamDebugTab; /* * Header of code chunks which contains additional information @@ -99,6 +100,11 @@ typedef struct beam_code_header { Uint32 *loc_index_to_cover_id; Uint line_coverage_len; + /* + * Debug information. debug->items are indexed directly by + * the index in each `debug_line` instruction. + */ + const BeamDebugTab *debug; #endif /* @@ -137,6 +143,21 @@ struct BeamCodeLineTab_ { const void** func_tab[1]; }; +/* + * Layout of the debug information. + */ +typedef struct { + Uint32 location_index; + Sint16 frame_size; + Uint16 num_vars; + Eterm *first; +} BeamDebugItem; + +struct BeamDebugTab_ { + Uint32 item_count; + BeamDebugItem *items; +}; + /* Total code size in bytes */ extern Uint erts_total_code_size; diff --git a/erts/emulator/beam/beam_file.c b/erts/emulator/beam/beam_file.c index 998391651ea6..64538b7a7bf9 100644 --- a/erts/emulator/beam/beam_file.c +++ b/erts/emulator/beam/beam_file.c @@ -668,6 +668,210 @@ static int parse_type_chunk(BeamFile *beam, IFF_Chunk *chunk) { } } +static int parse_debug_chunk_data(BeamFile *beam, BeamReader *p_reader) { + Sint32 count; + Sint32 total_num_vars; + int i; + BeamOpAllocator op_allocator; + BeamCodeReader *op_reader; + BeamOp* op = NULL; + BeamFile_DebugTable *debug = &beam->debug; + Eterm *tp; + byte *lp; + + LoadAssert(beamreader_read_i32(p_reader, &count)); + LoadAssert(beamreader_read_i32(p_reader, &total_num_vars)); + + beamopallocator_init(&op_allocator); + + op_reader = erts_alloc(ERTS_ALC_T_PREPARED_CODE, sizeof(BeamCodeReader)); + + op_reader->allocator = &op_allocator; + op_reader->file = beam; + op_reader->pending = NULL; + op_reader->first = 1; + op_reader->reader = *p_reader; + + if (count < 0 || total_num_vars < 0) { + goto error; + } + + debug->item_count = count; + debug->term_count = 2 * total_num_vars; + debug->items = erts_alloc(ERTS_ALC_T_PREPARED_CODE, + count * sizeof(BeamFile_DebugItem)); + debug->terms = erts_alloc(ERTS_ALC_T_PREPARED_CODE, + 2 * total_num_vars * sizeof(Eterm)); + debug->is_literal = erts_alloc(ERTS_ALC_T_PREPARED_CODE, + 2 * total_num_vars * sizeof(Eterm)); + + tp = debug->terms; + lp = debug->is_literal; + + for (i = 0; i < count; i++) { + BeamOpArg *arg; + int extra_args; + Sint32 num_vars; + + if (!beamcodereader_next(op_reader, &op)) { + goto error; + } + if (op->op != genop_call_2) { + goto error; + } + + debug->items[i].location_index = -1; + + arg = op->a; + + /* Process frame size. */ + switch (arg->type) { + case TAG_n: + debug->items[i].frame_size = BEAMFILE_FRAMESIZE_NONE; + break; + case TAG_a: + if (arg->val != am_entry) { + goto error; + } else { + debug->items[i].frame_size = BEAMFILE_FRAMESIZE_ENTRY; + } + break; + case TAG_u: + if (arg->val > ERTS_SINT32_MAX) { + goto error; + } + debug->items[i].frame_size = arg->val; + break; + default: + goto error; + } + + arg++; + + /* Get and check the number of extra arguments. */ + if (arg->type != TAG_u) { + goto error; + } + extra_args = arg->val; + + arg++; + + if (extra_args % 2 != 0) { + goto error; + } + + /* Process the list of variable mappings. */ + + num_vars = extra_args / 2; + if (num_vars > total_num_vars) { + goto error; + } + total_num_vars -= num_vars; + + debug->items[i].num_vars = num_vars; + debug->items[i].first = tp; + + while (extra_args > 0) { + Eterm var_name; + + switch (arg[0].type) { + case TAG_i: + *tp++ = make_small(arg[0].val); + *lp++ = 0; + break; + case TAG_q: + var_name = beamfile_get_literal(beam, arg[0].val); + if (is_not_bitstring(var_name) || + TAIL_BITS(bitstring_size(var_name))) { + goto error; + } + *tp++ = arg[0].val; + *lp++ = 1; + break; + default: + goto error; + } + + *lp = 0; + switch (arg[1].type) { + case TAG_i: + *tp = make_small(arg[1].val); + break; + case TAG_a: + *tp = arg[1].val; + break; + case TAG_n: + *tp = NIL; + break; + case TAG_x: + *tp = make_loader_x_reg(arg[1].val); + break; + case TAG_y: + *tp = make_loader_y_reg(arg[1].val); + break; + case TAG_q: + *tp = arg[1].val; + *lp = 1; + break; + default: + goto error; + } + + tp++, lp++; + arg += 2; + extra_args -= 2; + } + + beamopallocator_free_op(&op_allocator, op); + op = NULL; + } + + if (total_num_vars != 0) { + goto error; + } + + beamcodereader_close(op_reader); + beamopallocator_dtor(&op_allocator); + + return 1; + + error: + if (op != NULL) { + beamopallocator_free_op(&op_allocator, op); + } + + beamcodereader_close(op_reader); + beamopallocator_dtor(&op_allocator); + + if (debug->items) { + erts_free(ERTS_ALC_T_PREPARED_CODE, debug->items); + debug->items = NULL; + } + + if (debug->terms) { + erts_free(ERTS_ALC_T_PREPARED_CODE, debug->terms); + debug->terms = NULL; + } + + return 0; +} + +static int parse_debug_chunk(BeamFile *beam, IFF_Chunk *chunk) { + BeamReader reader; + Sint32 version; + + beamreader_init(chunk->data, chunk->size, &reader); + + LoadAssert(beamreader_read_i32(&reader, &version)); + + if (version == 0) { + return parse_debug_chunk_data(beam, &reader); + } else { + /* Silently ignore chunk of wrong version. */ + return 1; + } +} + static ErlHeapFragment *new_literal_fragment(Uint size) { ErlHeapFragment *bp; @@ -921,6 +1125,7 @@ beamfile_read(const byte *data, size_t size, BeamFile *beam) { MakeIffId('A', 't', 'o', 'm'), /* 11 */ MakeIffId('T', 'y', 'p', 'e'), /* 12 */ MakeIffId('M', 'e', 't', 'a'), /* 13 */ + MakeIffId('D', 'b', 'g', 'B'), /* 14 */ }; static const int UTF8_ATOM_CHUNK = 0; @@ -939,6 +1144,7 @@ beamfile_read(const byte *data, size_t size, BeamFile *beam) { static const int OBSOLETE_ATOM_CHUNK = 11; static const int TYPE_CHUNK = 12; static const int META_CHUNK = 13; + static const int DEBUG_CHUNK = 14; static const int NUM_CHUNKS = sizeof(chunk_iffs) / sizeof(chunk_iffs[0]); @@ -1036,6 +1242,13 @@ beamfile_read(const byte *data, size_t size, BeamFile *beam) { init_fallback_type_table(beam); } + if (chunks[DEBUG_CHUNK].size > 0) { + if (!parse_debug_chunk(beam, &chunks[DEBUG_CHUNK])) { + error = BEAMFILE_READ_CORRUPT_DEBUG_TABLE; + goto error; + } + } + beam->strings.data = chunks[STR_CHUNK].data; beam->strings.size = chunks[STR_CHUNK].size; @@ -1176,6 +1389,16 @@ void beamfile_free(BeamFile *beam) { beam->types.entries = NULL; } + if (beam->debug.items) { + erts_free(ERTS_ALC_T_PREPARED_CODE, beam->debug.items); + erts_free(ERTS_ALC_T_PREPARED_CODE, beam->debug.terms); + erts_free(ERTS_ALC_T_PREPARED_CODE, beam->debug.is_literal); + + beam->debug.items = NULL; + beam->debug.terms = NULL; + beam->debug.is_literal = NULL; + } + if (beam->static_literals.entries) { beamfile_literal_dtor(&beam->static_literals); } diff --git a/erts/emulator/beam/beam_file.h b/erts/emulator/beam/beam_file.h index f32f9db2670e..4898d35a488e 100644 --- a/erts/emulator/beam/beam_file.h +++ b/erts/emulator/beam/beam_file.h @@ -150,6 +150,24 @@ typedef struct { BeamType *entries; } BeamFile_TypeTable; +#define BEAMFILE_FRAMESIZE_ENTRY (-2) +#define BEAMFILE_FRAMESIZE_NONE (-1) + +typedef struct { + Uint32 location_index; + Sint32 frame_size; + Sint32 num_vars; + Eterm *first; +} BeamFile_DebugItem; + +typedef struct { + Sint32 item_count; + Sint32 term_count; + BeamFile_DebugItem *items; + Eterm *terms; + byte *is_literal; +} BeamFile_DebugTable; + typedef struct { IFF_File iff; @@ -166,6 +184,7 @@ typedef struct { BeamFile_LambdaTable lambdas; BeamFile_LineTable lines; BeamFile_TypeTable types; + BeamFile_DebugTable debug; /* Static literals are those defined in the file, and dynamic literals are * those created when loading. The former is positively indexed starting @@ -206,7 +225,8 @@ enum beamfile_read_result { BEAMFILE_READ_CORRUPT_LAMBDA_TABLE, BEAMFILE_READ_CORRUPT_LINE_TABLE, BEAMFILE_READ_CORRUPT_LITERAL_TABLE, - BEAMFILE_READ_CORRUPT_TYPE_TABLE + BEAMFILE_READ_CORRUPT_TYPE_TABLE, + BEAMFILE_READ_CORRUPT_DEBUG_TABLE }; typedef struct { diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 76fbf19a19d0..e0abed77a52a 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -170,6 +170,8 @@ erts_prepare_loading(Binary* magic, Process *c_p, Eterm group_leader, BeamLoadError0(stp, "corrupt locals table"); case BEAMFILE_READ_CORRUPT_TYPE_TABLE: BeamLoadError0(stp, "corrupt type table"); + case BEAMFILE_READ_CORRUPT_DEBUG_TABLE: + BeamLoadError0(stp, "corrupt BEAM debug information table"); case BEAMFILE_READ_SUCCESS: break; } diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 45c59fe9acb0..121c1ca4ba17 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -803,8 +803,10 @@ bif erts_trace_cleaner:check/0 bif erts_trace_cleaner:send_trace_clean_signal/1 # -# New in 28 +# New in 28. # bif erts_internal:system_monitor/1 bif erts_internal:system_monitor/3 bif erts_internal:processes_next/1 +bif code:get_debug_info/1 + diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp index 271f6d637a66..007612c1ef49 100644 --- a/erts/emulator/beam/jit/arm/instr_common.cpp +++ b/erts/emulator/beam/jit/arm/instr_common.cpp @@ -181,7 +181,7 @@ void BeamModuleAssembler::emit_validate(const ArgWord &Arity) { # ifdef JIT_HARD_DEBUG emit_enter_runtime_frame(); - for (unsigned i = 0; i < arity.get(); i++) { + for (unsigned i = 0; i < Arity.get(); i++) { mov_arg(ARG1, ArgVal(ArgVal::XReg, i)); emit_enter_runtime(); @@ -3186,3 +3186,9 @@ void BeamModuleAssembler::emit_coverage(void *coverage, Uint index, Uint size) { ASSERT(0); } } + +void BeamModuleAssembler::emit_debug_line(const ArgWord &Loc, + const ArgWord &Index, + const ArgWord &Live) { + emit_validate(Live); +} diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab index b17189647b69..79bfbc9b4c4a 100644 --- a/erts/emulator/beam/jit/arm/ops.tab +++ b/erts/emulator/beam/jit/arm/ops.tab @@ -91,7 +91,7 @@ line I executable_line I I -debug_line u u u => _ +debug_line I I t allocate t t allocate_heap t I t diff --git a/erts/emulator/beam/jit/asm_load.c b/erts/emulator/beam/jit/asm_load.c index 827d03c33fad..8911b199e9ba 100644 --- a/erts/emulator/beam/jit/asm_load.c +++ b/erts/emulator/beam/jit/asm_load.c @@ -706,6 +706,20 @@ int beam_load_emit_op(LoaderState *stp, BeamOp *tmp_op) { } break; } + case op_debug_line_IIt: { + BeamFile_DebugItem *items = stp->beam.debug.items; + Uint location_index = tmp_op->a[0].val; + Sint index = tmp_op->a[1].val - 1; + + if (add_line_entry(stp, location_index, 1)) { + goto load_error; + } + + ASSERT(items[index].location_index == -1); + items[index].location_index = stp->current_li - 1; + + break; + } case op_int_code_end: /* End of code found. */ if (stp->function_number != stp->beam.code.function_count) { @@ -859,6 +873,58 @@ static const BeamCodeLineTab *finish_line_table(LoaderState *stp, return line_tab_ro; } +static const BeamDebugTab *finish_debug_table(LoaderState *stp, + char *module_base, + size_t module_size) { + BeamFile_DebugTable *debug = &stp->beam.debug; + const BeamDebugTab *debug_tab_ro; + byte *debug_tab_rw_base; + BeamDebugTab *debug_tab_top; + Eterm *debug_tab_terms; + BeamDebugItem *debug_tab_items; + Uint item_count = debug->item_count; + Uint term_count = debug->term_count; + Uint i; + + if (item_count == 0) { + return NULL; + } + + debug_tab_ro = (const BeamDebugTab *)beamasm_get_rodata(stp->ba, "debug"); + debug_tab_rw_base = get_writable_ptr(stp->executable_region, + stp->writable_region, + debug_tab_ro); + debug_tab_top = (BeamDebugTab *)debug_tab_rw_base; + debug_tab_terms = (Eterm *)&debug_tab_top[1]; + debug_tab_items = (BeamDebugItem *)&debug_tab_terms[term_count]; + + debug_tab_top->item_count = debug->item_count; + debug_tab_top->items = debug_tab_items; + + for (i = 0; i < term_count; i++) { + if (debug->is_literal[i]) { + ASSERT(debug->is_literal[i] == 1); + debug_tab_terms[i] = + beamfile_get_literal(&stp->beam, debug->terms[i]); + } else { + ASSERT(debug->is_literal[i] == 0); + debug_tab_terms[i] = debug->terms[i]; + } + } + + for (i = 0; i < item_count; i++) { + Uint num_vars = debug->items[i].num_vars; + + debug_tab_items[i].location_index = debug->items[i].location_index; + debug_tab_items[i].frame_size = debug->items[i].frame_size; + debug_tab_items[i].num_vars = num_vars; + debug_tab_items[i].first = debug_tab_terms; + debug_tab_terms += 2 * num_vars; + } + + return debug_tab_ro; +} + int beam_load_finish_emit(LoaderState *stp) { const BeamCodeHeader *code_hdr_ro = NULL; BeamCodeHeader *code_hdr_rw = NULL; @@ -888,6 +954,17 @@ int beam_load_finish_emit(LoaderState *stp) { beamasm_embed_bss(stp->ba, "line", line_size); } + /* Calculate size of the load BEAM debug information. */ + if (stp->beam.debug.item_count > 0) { + BeamFile_DebugTable *debug = &stp->beam.debug; + Uint debug_size; + + debug_size = sizeof(BeamDebugTab); + debug_size += (Uint)debug->item_count * sizeof(BeamFile_DebugItem); + debug_size += (Uint)debug->term_count * sizeof(Eterm); + beamasm_embed_bss(stp->ba, "debug", debug_size); + } + /* Place the string table and, optionally, attributes here. */ beamasm_embed_rodata(stp->ba, "str", @@ -978,6 +1055,10 @@ int beam_load_finish_emit(LoaderState *stp) { * names are literal lists. */ code_hdr_rw->line_table = finish_line_table(stp, module_base, module_size); + /* Debug information must be added after moving literals, since literals + * are used extensively. */ + code_hdr_rw->debug = finish_debug_table(stp, module_base, module_size); + if (stp->beam.attributes.size) { const byte *attr = beamasm_get_rodata(stp->ba, "attr"); diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp index 0ff14c57a7eb..c9bcca7c8c3d 100644 --- a/erts/emulator/beam/jit/x86/instr_common.cpp +++ b/erts/emulator/beam/jit/x86/instr_common.cpp @@ -3334,3 +3334,9 @@ void BeamModuleAssembler::emit_coverage(void *coverage, Uint index, Uint size) { ASSERT(0); } } + +void BeamModuleAssembler::emit_debug_line(const ArgWord &Loc, + const ArgWord &Index, + const ArgWord &Live) { + emit_validate(Live); +} diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab index 3783a3c52ce7..bd91565bb42f 100644 --- a/erts/emulator/beam/jit/x86/ops.tab +++ b/erts/emulator/beam/jit/x86/ops.tab @@ -91,7 +91,7 @@ line I executable_line I I -debug_line u u u => _ +debug_line I I t allocate t t allocate_heap t I t diff --git a/lib/compiler/test/beam_debug_info_SUITE.erl b/lib/compiler/test/beam_debug_info_SUITE.erl index c01455a81586..11056f684b8d 100644 --- a/lib/compiler/test/beam_debug_info_SUITE.erl +++ b/lib/compiler/test/beam_debug_info_SUITE.erl @@ -65,6 +65,8 @@ end_per_group(_GroupName, Config) -> Config. smoke(_Config) -> + {ok, Peer, Node} = ?CT_PEER(#{}), + TestBeams0 = get_unique_beam_files(), TestBeams = compiler_beams() ++ TestBeams0, @@ -81,12 +83,21 @@ smoke(_Config) -> """, io:put_chars(S), - test_lib:p_run(fun do_smoke/1, TestBeams). + HasDbgSupport = erlang:system_info(emu_flavor) =:= jit, + + test_lib:p_run(fun(Beam) -> + do_smoke(Beam, Node, HasDbgSupport) + end, TestBeams), + + peer:stop(Peer), + + ok. + compiler_beams() -> filelib:wildcard(filename:join([code:lib_dir(compiler), "ebin", "*.beam"])). -do_smoke(Beam) -> +do_smoke(Beam, Node, HasDbgSupport) -> try {ok,{Mod,[{abstract_code,{raw_abstract_v1,Abstr0}}]}} = beam_lib:chunks(Beam, [abstract_code]), @@ -100,7 +111,38 @@ do_smoke(Beam) -> [beam_debug_info,dexp,binary,report_errors]), SrcVars = source_variables(Abstr), IndexToFunctionMap = abstr_debug_lines(Abstr), - DebugInfo = get_debug_info(Mod, Code), + + %% Retrieve the debug information in two different ways. + {DebugInfo,CookedDebugInfo} = get_debug_info(Mod, Code), + CookedDebugInfoSorted = lists:sort(CookedDebugInfo), + DebugInfoBif = case HasDbgSupport of + true -> + lists:sort(load_get_debug_info(Node, Mod, Code)); + false -> + %% No runtime support for debug info. + CookedDebugInfoSorted + end, + if + CookedDebugInfoSorted =:= DebugInfoBif -> + ok; + true -> + Z0 = lists:zip(CookedDebugInfoSorted, DebugInfoBif, + {pad, {short,short}}), + Z = lists:dropwhile(fun({A,B}) -> A =:= B end, Z0), + io:format("~p\n", [Z]), + io:format("~p\n", [CookedDebugInfoSorted]), + io:format("~p\n", [DebugInfoBif]), + + error(inconsistent_debug_info) + end, + + case Mod of + ?MODULE when HasDbgSupport -> + %% This module has been compiled with `beam_debug_info`. + CookedDebugInfoSorted = lists:sort(code:get_debug_info(Mod)); + _ -> + ok + end, {DbgVars,DbgLiterals} = debug_info_vars(DebugInfo, IndexToFunctionMap), @@ -207,6 +249,27 @@ family_difference(F0, F1) -> S3 = sofs:family_specification(SpecFun, S2), sofs:to_external(S3). +%% Load a module on a remote node and retrieve debug information. +load_get_debug_info(Node, Mod, Beam) -> + erpc:call(Node, + fun() -> + {module,Mod} = code:load_binary(Mod, "", Beam), + DebugInfo = code:get_debug_info(Mod), + + case Mod of + ?MODULE -> + %% Don't purge the module that this fun + %% is located in. + ok; + _ -> + %% Smoke test of purging a module with + %% debug information. + _ = code:delete(Mod), + _ = code:purge(Mod) + end, + DebugInfo + end). + %% %% Extract variables mentioned in the source code. Try to remove %% variables that will never show up in the debug information; for @@ -456,7 +519,8 @@ abstr_extract_debug_lines(_, Acc) -> Acc. %%% get_debug_info(Mod, Beam) -> {ok,{Mod,[{"DbgB",DebugInfo0}, - {atoms,Atoms0}]}} = beam_lib:chunks(Beam, ["DbgB",atoms]), + {atoms,Atoms0}, + {"Line",Lines0}]}} = beam_lib:chunks(Beam, ["DbgB",atoms,"Line"]), Atoms = maps:from_list(Atoms0), Literals = case beam_lib:chunks(Beam, ["LitT"]) of {ok,{Mod,[{"LitT",Literals0}]}} -> @@ -470,8 +534,39 @@ get_debug_info(Mod, Beam) -> _NumVars:32, DebugInfo1/binary>> = DebugInfo0, 0 = Version, - DebugInfo = decode_debug_info(DebugInfo1, Literals, Atoms, Op), - lists:zip(lists:seq(1, length(DebugInfo)), DebugInfo). + RawDebugInfo0 = decode_debug_info(DebugInfo1, Literals, Atoms, Op), + RawDebugInfo = lists:zip(lists:seq(1, length(RawDebugInfo0)), RawDebugInfo0), + + %% The cooked debug info has line numbers instead of indices. + Lines = decode_line_table(Lines0, Literals, Atoms), + {beam_file,Mod,_Exp,_Attr,_Opts,Fs} = beam_disasm:file(Beam), + DebugMap = #{Index => LocationIndex || + {function,_Name,_Arity,_Entry,Is} <:- Fs, + {debug_line,LocationIndex,Index,_Live} <- Is}, + CookedDebugInfo = + [{map_get(map_get(Index, DebugMap), Lines),Info} || + {Index,Info} <:- RawDebugInfo, + is_map_key(Index, DebugMap)], + + {RawDebugInfo,CookedDebugInfo}. + +decode_line_table(<<0:32,_Bits:32,_NumIs:32,NumLines:32, + _NumFnames:32, Lines0/binary>>, + Literals, Atoms) -> + Lines = decode_line_tab_1(Lines0, Literals, Atoms, NumLines), + #{K => V || {K,V} <:- lists:zip(lists:seq(1, length(Lines)), Lines)}. + +decode_line_tab_1(_Lines, _Literals, _Atoms, 0) -> + []; +decode_line_tab_1(Lines0, Literals, Atoms, N) -> + case decode_arg(Lines0, Literals, Atoms) of + {{atom,_},Lines1} -> + decode_line_tab_1(Lines1, Literals, Atoms, N); + {{integer,Line},Lines1} -> + [Line|decode_line_tab_1(Lines1, Literals, Atoms, N - 1)]; + {nil,Lines1} -> + decode_line_tab_1(Lines1, Literals, Atoms, N) + end. decode_literal_table(<<0:32,N:32,Tab/binary>>) -> #{Index => binary_to_term(Literal) || diff --git a/lib/kernel/src/code.erl b/lib/kernel/src/code.erl index 89ab3433e126..5740038a1928 100644 --- a/lib/kernel/src/code.erl +++ b/lib/kernel/src/code.erl @@ -403,7 +403,8 @@ common reasons. module_status/0, module_status/1, modified_modules/0, - get_mode/0]). + get_mode/0, + get_debug_info/1]). -removed({rehash,0,"the code path cache feature has been removed"}). -removed({is_module_native,1,"HiPE has been removed"}). @@ -419,6 +420,18 @@ common reasons. -type coverage_mode() :: 'none' | 'function' | 'function_counters' | 'line_coverage' | 'line_counters'. +-export_type([debug_line/0, debug_frame/0, debug_name/0, debug_source/0, + debug_value/0, debug_info/0]). + +-nominal debug_line() :: pos_integer(). +-nominal debug_frame() :: non_neg_integer() | 'entry' | 'none'. +-nominal debug_name() :: binary() | 1..255. +-nominal debug_source() :: {'x',non_neg_integer()} + | {'y',non_neg_integer()} + | {value, _}. +-nominal debug_value() :: {debug_name(), debug_source()}. +-nominal debug_info() :: [{debug_line(), {debug_frame(), [debug_value()]}}]. + -export([coverage_support/0, get_coverage/2, get_coverage_mode/0, @@ -2326,3 +2339,10 @@ _See also:_ [Native Coverage Support](#module-native-coverage-support) Supported :: boolean(). coverage_support() -> erlang:nif_error(undefined). + +-doc(#{since => <<"OTP 28.0">>}). +-spec get_debug_info(Module) -> DebugInfo when + Module :: module(), + DebugInfo :: debug_info(). +get_debug_info(_Module) -> + erlang:nif_error(undefined). diff --git a/lib/kernel/src/erl_kernel_errors.erl b/lib/kernel/src/erl_kernel_errors.erl index 59a55b8590ee..98f5a76c3878 100644 --- a/lib/kernel/src/erl_kernel_errors.erl +++ b/lib/kernel/src/erl_kernel_errors.erl @@ -81,6 +81,18 @@ format_code_error(get_coverage_mode, [Module]) -> end end] end); +format_code_error(get_debug_info, [Module]) -> + [if + not is_atom(Module) -> + not_atom; + true -> + case erlang:module_loaded(Module) of + false -> + module_not_loaded; + true -> + ~"this runtime system does not support the native debug API" + end + end]; format_code_error(reset_coverage, [Module]) -> coverage( fun () -> diff --git a/lib/kernel/test/code_coverage_SUITE.erl b/lib/kernel/test/code_coverage_SUITE.erl index 0ccdb57c9b2f..88c17c4f2656 100644 --- a/lib/kernel/test/code_coverage_SUITE.erl +++ b/lib/kernel/test/code_coverage_SUITE.erl @@ -273,6 +273,9 @@ error_info(_Config) -> {get_coverage, [cover_line_id,NotLoaded]}, {get_coverage, [whatever,?MODULE]}, + {get_debug_info, [42]}, + {get_debug_info, [NotLoaded]}, + {reset_coverage, [42]}, {reset_coverage, [NotLoaded]}, {reset_coverage, [?MODULE]},