添加

5 years ago · 7d2ddc02f4
--- a/rebar.config
+++ b/rebar.config
@ -1,2 +1,2 @@
 {erl_opts, [debug_info]}.
 {erl_opts, [no_debug_info]}.
 {deps, []}.
--- a/src/docs/erlang数据结构相关.md
+++ b/src/docs/erlang数据结构相关.md
@ -0,0 +1,280 @@
 # erlang各种数据类型占用的内存大小
    有效编程的一个好的开始是知道不同数据类型和操作需要多少内存。
    Erlang数据类型和其他项目消耗多少内存与实现方式有关，但是下表显示了OTP 19.0中erts-8.0系统的一些数据。
    度量单位是存储字。 同时存在32位和64位实现。 因此，一个字分别是4字节或8字节。
    erlang:system_info(wordsize).
    
 ```
 Data Type       	Memory Size

 Small integer
                    1 word.
                    On 32-bit architectures: -134217729 < i < 134217728
                    (28 bits).
                    On 64-bit architectures: -576460752303423489 < i <
                    576460752303423488 (60 bits).

 Large 
                    integer 3..N words.

 Atom	
                    1 word.
                    An atom refers into an atom table, which also consumes
                    memory. The atom text is stored once for each unique
                    atom in this table. The atom table is not garbage-collected.

 Float	
                    On 32-bit architectures: 4 words.
                    On 64-bit architectures: 3 words.

 Binary	
                    3..6 words + data (can be shared).

 List	
                    1 word + 1 word per element + the size of each element.

 String (is the same as a list of integers)
                    1 word + 2 words per character.

 Tuple
                	2 words + the size of each element.

 Small Map
                	5 words + the size of all keys and values.

 Large Map (> 32 keys)
                	N x F words + the size of all keys and values.
                    N is the number of keys in the Map.
                    F is a sparsity factor that can vary 
                    between 1.6 and 1.8 due to the probabilistic nature of
                    the internal HAMT data structure.

 Pid	
                    1 word for a process identifier from the current local
                    node + 5 words for a process identifier from another
                    node.
                    A process identifier refers into a process table and a
                    node table, which also consumes memory.                    

 Port	            
                    1 word for a port identifier from the current local node +
                    5 words for a port identifier from another node.
                    A port identifier refers into a port table and a node table,
                    which also consumes memory.

 Reference
                    On 32-bit architectures: 5 words for a reference from
                    the current local node + 7 words for a reference from
                    another node.
                    On 64-bit architectures: 4 words for a reference from
                    the current local node + 6 words for a reference from
                    another node.
                    A reference refers into a node table, which also
                    consumes memory.
                   
 Fun	
                    9..13 words + the size of environment.
                    A fun refers into a fun table, which also consumes
                    memory.

 Ets table	        
                    Initially 768 words + the size of each element (6 words
                    + the size of Erlang data). The table grows when
                    necessary.  
  
 Erlang process	
                    338 words when spawned, including a heap of 233 words.
 ```

 # System Limits
    Erlang语言规范对进程数，原子长度等没有任何限制。 但是，出于性能和内存节省的原因，在Erlang语言和执行环境的实际实现中始终会受到限制。
 ```
 Processes 
            The maximum number of simultaneously alive Erlang
            processes is by default 262,144. This limit can be
            configured at startup. For more information, see the
            +P command-line flag in the erl(1) manual page in
            ERTS.  

 Known nodes
           A remote node Y must be known to node X if there
           exists any pids, ports, references, or funs (Erlang data
           types) from Y on X, or if X and Y are connected. The
           maximum number of remote nodes simultaneously/ever
           known to a node is limited by the maximum number of
           atoms available for node names. All data concerning
           remote nodes, except for the node name atom, are
           garbage-collected.  

 Connected nodes
           The maximum number of simultaneously connected
           nodes is limited by either the maximum number of
           simultaneously known remote nodes, the maximum  
           number of (Erlang) ports available, or the maximum
           number of sockets available. 

 Characters in an atom
          255.

 Atoms
          By default, the maximum number of atoms is 1,048,576.
          This limit can be raised or lowered using the +t option.

 Elements in a tuple
          The maximum number of elements in a tuple is
          16,777,215 (24-bit unsigned integer).

 Size of binary  
          In the 32-bit implementation of Erlang, 536,870,911
          bytes is the largest binary that can be constructed
          or matched using the bit syntax. In the 64-
          bit implementation, the maximum size is
          2,305,843,009,213,693,951 bytes. If the limit
          is exceeded, bit syntax construction fails with a
          system_limit exception, while any attempt to
          match a binary that is too large fails. This limit is
          enforced starting in R11B-4.
          In earlier Erlang/OTP releases, operations on too large
          binaries in general either fail or give incorrect results.
          In future releases, other operations that create binaries
          (such as list_to_binary/1) will probably also
          enforce the same limit.

 Total amount of data allocated by an Erlang node
           The Erlang runtime system can use the complete 32-bit
           (or 64-bit) address space, but the operating system often
           limits a single process to use less than that.

 Length of a node name
            An Erlang node name has the form host@shortname
            or host@longname. The node name is used as an atom
            within the system, so the maximum size of 255 holds
            also for the node name.

 Open ports
           The maximum number of simultaneously open Erlang
           ports is often by default 16,384. This limit can be
           configured at startup. For more information, see the
           +Q command-line flag in the erl(1) manual page in
           ERTS. 

 Open files and sockets
          同时打开的文件和套接字的最大数量取决于可用的Erlang端口的最大数量，以及特定于操作系统的设置和限制。 

 Number of arguments to a function or fun
          255

 Unique References on a Runtime System Instance
            Each scheduler thread has its own set of references,
            and all other threads have a shared set of references.
            Each set of references consist of 2## - 1 unique
            references. That is, the total amount of unique
            references that can be produced on a runtime system
            instance is (NoSchedulers + 1) × (2## -
            1).
            If a scheduler thread create a new reference each nano
            second, references will at earliest be reused after more
            than 584 years. That is, for the foreseeable future they
            are unique enough.

 Unique Integers on a Runtime System Instance
            There are two types of unique integers both created
            using the erlang:unique_integer() BIF:
            1. Unique integers created with the monotonic
            modifier consist of a set of 2## - 1 unique integers.
            2. Unique integers created without the monotonic
            modifier consist of a set of 2## - 1 unique integers
            per scheduler thread and a set of 2## - 1 unique
            integers shared by other threads. That is, the total
            amount of unique integers without the monotonic
            modifier is (NoSchedulers + 1) × (2## -
            1).
            If a unique integer is created each nano second, unique
            integers will at earliest be reused after more than 584
            years. That is, for the foreseeable future they are unique
            enough.
 ```  

 # Erlang标准数据结构的选择
  实际上，Erlang程序使用列表（本机或通过dict）来处理涉及多达数百个元素的数据结构，并使用ETS（Erlang术语存储）或mnesia来处理更大的数据。
  ETS使用散列来允许几乎恒定时间访问几乎任意数量的数据。
  对于由几个（几十个或几百个）项组成的数据集合，列表通常要优于ETS和树。 对于大量小物品，ETS往往效果最好。 对于较大的项目，平衡树可以胜过ETS，因为它们避免了复制数据。
  Suggestion：
  elments count: 0 － 100 | 100 - 10000  |  10000 -
  our select   :  list   |      ets     |  gb_tree
  
 # Erlang 常用数据结构实现
    erlang虚拟机中用Eterm表示所有的类型的数据，具体的实施方案通过占用Eterm的后几位作为类型标签，然后根据标签类型来解释剩余位的用途。这个标签是多层级的，最外层占用两位，有三种类型： 
        list，剩下62位是指向列表Cons的指针
        boxed对象，即复杂对象，剩余62位指向boxed对象的对象头。包括元组，大整数，外部Pid/Port等
        immediate立即数，即可以在一个字中表示的小型对象，包括小整数，本地Pid/Port，Atom，NIL等
        
        这三种类型是Erlang类型的大框架，前两者是可以看做是引用类型，立即数相当于是值类型，但无论对于哪种类型，Erlang Eterm本身只占用一个字，理解这一点是很重要的。
        对于二三级标签的细分和编码，一般我们无需知道这些具体的底层细节，以下是几种常用的数据结构实现方式。
    一. 常用类型
    1. atom  
        atom用立即数表示，在Eterm中保存的是atom在全局atom表中的索引，依赖于高效的哈希和索引表，Erlang的atom比较和匹配像整数一样高效。atom表是不回收的，并且默认最大值为1024*1024，超过这个限制Erlang虚拟机将会崩溃，可通过+t参数调整该上限。      
    2.Pid/Port
        在R9B之后，随着进程数量增加和其它因素，Pid只在32位中表示本地Pid(A=0)，将32位中除了4位Tag之外的28位，都可用于进程Pid表示，
        出于Pid表示的历史原因，仍然保留三段式的显示，本地Pid表示变成了<0, Pid低15位, Pid高13位>。对于外部Pid，采用boxed复合对象表示，
        在将本地Pid发往其它node时，Erlang会自动将为Pid加上本地节点信息，并打包为一个boxed对象，占用6个字。另外，Erlang需要维护Pid表，
        每个条目占8个字节，当进程数量过大时，Pid表将占用大量内存，Erlang默认可以使用18位有效位来表示Pid(262144)，可通过+P参数调节，
        最大值为27位(2^27-1)，此时Pid表占用内存为2G。
    3. ists
       列表以标签01标识，剩余62位指向列表的Cons单元，Cons是[Head|Tail]的组合，在内存中体现为两个相邻的Eterm，Head可以是任何类型的Eterm，
       。因此形如L2 = [Elem|L1]的操作，实际上构造了一个新的Cons，其中Head是Elem Eterm，Tail是L1 Eterm，然后将L2的Eterm指向了这个新的Cons，
       因此L2即代表了这个新的列表。对于[Elem|L2] = L1，实际上是提出了L1 Eterm指向的Cons，将Head部分赋给Elem，Tail部分赋给L2，
       注意Tail本身就是个List的Eterm，因此list是单向列表，并且构造和提取操作是很高效的。需要再次注意的是，Erlang所有类型的Eterm本身只占用一个字大小。
       这也是诸如list,tuple能够容纳任意类型的基础。
       
       Erlang中进程内对对象的重复引用只需占用一份对象内存(只是Eterm本身一个字的拷贝)，但是在对象跨进程时，对象会被展开，执行速深度拷贝：  
         
    4. tuple
       tuple属于boxed对象的一种，每个boxed对象都有一个对象头(header)，boxed Eterm即指向这个header，这个header里面包含具体的boxed对象类型，
       如tuple的header末6位为000000，前面的位数为tuple的size： 
       tuple实际上就是一个有头部的数组，其包含的Eterm在内存中紧凑排列，tuple的操作效率和数组是一致的。
       list，tuple中添加元素，实际上都是在拷贝Eterm本身，Erlang虚拟机会追踪这些引用，并负责垃圾回收。
    5. binary
       Erlang binary用于处理字节块，Erlang其它的数据结构(list,tuple,record)都是以Eterm为单位的，用于处理字节块会浪费大量内存
       ，如”abc”占用了7个字(加上ETerm本身)，binary为字节流提供一种操作高效，占用空间少的解决方案。
       
       之前我们介绍的数据结构都存放在Erlang进程堆上，进程内部可以使用对象引用，在对象跨进程传输时，会执行对象拷贝。
       为了避免大binary跨进程传输时的拷贝开销，Erlang针对binary作出了优化，将binary分为小binary和大binary。
       heap binary   
            小于64字节(定义于erl_binary.h ERL_ONHEAP_BIN_LIMIT宏)的小binary直接创建在进程堆上，称为heap binary，heap binary是一个boxed对象：  
       refc binary
            大于64字节的binary将创建在Erlang虚拟机全局堆上，称为refc binary(reference-counted binary)，可被所有Erlang进程共享，
            这样跨进程传输只需传输引用即可，虚拟机会对binary本身进行引用计数追踪，以便GC。refc binary需要两个部分来描述，
            位于全局堆的refc binary数据本身和位于进程堆的binary引用(称作proc binary)，这两种数据结构定义于global.h中。
            下图描述refc binary和proc binary的关系：
            所有的OffHeap(进程堆之外的数据)被组织为一个单向链表，进程控制块(erl_process.h struct process)中的off_heap字段维护链表头和所有OffHeap对象的总大小，
            当这个大小超过虚拟机阀值时，将导致一次强制GC。注意，refc binary只是OffHeap对象的一种，以后可扩展其它种类。
       sub binary
            sub binary是Erlang为了优化binary分割的(如split_binary/2)，由于Erlang变量不可变语义，拷贝分割的binary是效率比较底下的做法，Erlang通过sub binary来复用原有binary。
       bit string
            当我们通过如<<2:3,3:6>>的位语法构建binary时，将得到<<65,1:1>>这种非字节对齐的数据，即二进制流，
            在Erlang中被称为bitstring，Erlang的bitstring基于ErlSubBin结构实现，此时bitsize为最后一个字节的有效位数，
            size为有效字节数(不包括未填满的最后一个字节)，对虚拟机底层来说，sub bianry和bit string是同一种数据结构。
            
    ## 复合类型
     1. record
       这个类型无需过多介绍，它就是一个tuple，所谓record filed在预编译后实际上都是通过数值下标来索引，因此它访问field是O(1)复杂度的。        
     2. map
        该结构体之后就是依次存放的Value，因此maps的get操作，需要先遍历keys tuple，找到key所在下标，然后在value中取出该下标偏移对应的值。因此是O(n)复杂度的。详见maps:get源码($BEAM_SRC/erl_map.c erts_maps_get)。
        
        如此的maps，只能作为record的替用，并不是真正的Key->Value映射，因此不能存放大量数据。而在OTP18中，maps加入了针对于big map的hash机制，
        当maps:size < MAP_SMALL_MAP_LIMIT时，使用flatmap结构，也就是上述OTP17中的结构，当maps:size >= MAP_SMALL_MAP_LIMIT时，
        将自动使用hashmap结构来高效存取数据。MAP_SMALL_MAP_LIMIT在erl_map.h中默认定义为32。
        
        仍然要注意Erlang本身的变量不可变原则，每次执行更新maps，都会导致新开辟一个maps，并且拷贝原maps的keys和values，在这一点上，maps:update比maps:put更高效，因为前者keys数量不会变，因此无需开辟新的keys tuple，拷贝keys tuples ETerm即可。实际使用maps时：
        
        更新已有key值时，使用update(:=)而不是put(=>)，不仅可以检错，并且效率更高
        当key/value对太多时，对其进行层级划分，保证其拷贝效率
        实际测试中，OTP18中的maps在存取大量数据时，效率还是比较高的，这里有一份maps和dict的简单测试函数，可通过OTP17和OTP18分别运行来查看效率区别。通常情况下，我们应当优先使用maps，比起dict，它在模式匹配，mongodb支持，可读性上都有很大优势。    
     
     3. array
        array下标从0开始
        array有两种模式，一种固定大小，另一种按需自动增长大小，但不会自动收缩
        支持稀疏存储，执行array:set(100,value,array:new())，那么[0,99]都会被设置为默认值(undefined)，该默认值可修改。
        在实现上，array最外层被包装为一个record:
     ... 其他等待被添加   
--- a/src/dynamicCompile/utKvsToBeam.erl
+++ b/src/dynamicCompile/utKvsToBeam.erl
@ -2,6 +2,7 @@

 -export([
    load/2
    , beamToSrc/1
 ]).

 %% 注意 map类型的数据不能当做key
@ -40,3 +41,16 @@ lookup_clauses([], Acc) ->
    lists:reverse(lists:flatten([lookup_clause_anon() | Acc]));
 lookup_clauses([{Key, Value} | T], Acc) ->
    lookup_clauses(T, [lookup_clause(Key, Value) | Acc]).

 %% 通过beam生成erl文件，生成的beam编译选项必要带debug_info才行
 beamToSrc(Module) ->
    case beam_lib:chunks(code:which(Module), [abstract_code]) of
        {ok, {_, [{abstract_code, {_, AC}}]}} ->
            Code = erl_prettypr:format(erl_syntax:form_list(AC)),
            file:write_file(lists:concat([Module, ".erl"]), list_to_binary(Code)),
            io:format("build beam:~p to erl:~p success.~n", [Module, Module]);
        {error, beam_lib, Reason} ->
            io:format("code_gen_erl_file error, reason:~p~n", [Reason]);
        _Err ->
            io:format("code_gen_erl_file error, reason:~p~n", [_Err])
    end.
--- a/src/srvNodeMgr/main.erl
+++ b/src/srvNodeMgr/main.erl
@ -0,0 +1,149 @@
 -module(main).

 -export([
    server_start/0,
    server_stop/0,
    server_stop/1,
    is_running/0, 
    is_running/1,
    reload/0,
    info/0,
    psl/0,
    psl/1,
    psl/2,
    get_info/0
 ]).

 -define(SERVER_APPS, [sasl, crypto, inets, ranch, os_mon, hot]).

 server_start()->
    ok = application:load(hot),
    app_misc:init(),
    ok = start_applications(?SERVER_APPS).

 %% 加载更新
 reload() ->
    reloader:reload_all().

 %%停止游戏服务器
 server_stop() ->
    server_stop(30).

 is_running() ->
    is_running(node()).

 is_running(Node) ->
    node_misc:is_process_running(Node, server).

 %%停止游戏服务器
 server_stop(_SleepSeconds) ->
    app_misc:pause_accept(),
    stop_applications(?SERVER_APPS),
    ok.

 info() ->
    io:format( "abormal termination:
        ~n   Scheduler id:                         ~p
        ~n   Num scheduler:                        ~p
        ~n   Process count:                        ~p
        ~n   Process limit:                        ~p
        ~n   Memory used by erlang processes:      ~p
        ~n   Memory allocated by erlang processes: ~p
        ~n   The total amount of memory allocated: ~p
        ~n",
        get_info()),
    ok.
 get_info() ->
    SchedId      = erlang:system_info(scheduler_id),
    SchedNum     = erlang:system_info(schedulers),
    ProcCount    = erlang:system_info(process_count),
    ProcLimit    = erlang:system_info(process_limit),
    ProcMemUsed  = erlang:memory(processes_used),
    ProcMemAlloc = erlang:memory(processes),
    MemTot       = erlang:memory(total),
    [SchedId, SchedNum, ProcCount, ProcLimit,
     ProcMemUsed, ProcMemAlloc, MemTot].

 psl() ->
    psl(100).

 psl(Num) ->
    lists:foldl(
      fun(P, true)->
              case erlang:process_info(P, message_queue_len) of
                  {message_queue_len, Count} when Count > Num ->
                      print_process_info(P),
                      false;
                  _ ->
                      true
              end;
         (_, false) ->
              false
      end, true, erlang:processes()).

 psl(ProcessPid, Num)    ->
    case erlang:process_info(ProcessPid, message_queue_len) of
        {message_queue_len, Count} when Count > Num ->
            print_process_info(ProcessPid);
        _ ->
            ok
    end.

 print_process_info(P) ->
    io:format("~n~n=====process info===~n"
              "~p~n~p~n~p~n~p~n~p~n~p~n~p~n~p~n~p~n~p~n~n~n",
              [P,
               erlang:process_info(P, registered_name),
               erlang:process_info(P, current_function),
               erlang:process_info(P, message_queue_len),
               erlang:process_info(P, status),
               erlang:process_info(P, suspending),
               erlang:process_info(P, last_calls),
               erlang:process_info(P, links),
               erlang:process_info(P, dictionary),
               erlang:process_info(P, current_stacktrace)
              ]).

 %%############辅助调用函数##############
 manage_applications(Iterate, Do, Undo, SkipError, ErrorTag, Apps) ->
    Iterate(fun(App, Acc) ->
                    case Do(App) of
                        ok -> [App | Acc];%合拢
                        {error, {SkipError, _}} when is_atom(SkipError) ->
                            Acc;
                        {error, {Error, Reason}} when is_list(SkipError) ->
                            case lists:member(Error, SkipError) of
                                true ->
                                    Acc;
                                false ->
                                    io:format(
                                       "App ~p, Reason ~p~n", [App, Reason]),
                                    lists:foreach(Undo, Acc),
                                    throw({error, {ErrorTag, App, Reason}})
                            end;
                        {error, Reason} ->
                            io:format("App ~p, Reason ~p~n", [App, Reason]),
                            lists:foreach(Undo, Acc),
                            throw({error, {ErrorTag, App, Reason}})
                    end
            end, [], Apps),
    ok.

 start_applications(Apps) ->
    manage_applications(fun lists:foldl/3,
                        fun application:start/1,
                        fun application:stop/1,
                        [already_started, cannot_start_application],
                        cannot_start_application,
                        Apps).

 stop_applications(Apps) ->
    io:format("stop_applications stopping.~n",[]),
    manage_applications(fun lists:foldr/3,
                        fun application:stop/1,
                        fun application:start/1,
                        not_started,
                        cannot_stop_application,
                        Apps).


--- a/src/srvNodeMgr/node_misc.erl
+++ b/src/srvNodeMgr/node_misc.erl
@ -0,0 +1,59 @@
 -module(node_misc).

 -export([names/1, make/1, parts/1, cookie_hash/0,
         is_running/2, is_process_running/2]).

 -define(EPMD_TIMEOUT, 30000).

 names(Hostname) ->
    Self = self(),
    Ref = make_ref(),
    {Pid, MRef} = spawn_monitor(
                    fun () -> 
                            Self ! {Ref, net_adm:names(Hostname)} 
                    end),
    timer:exit_after(?EPMD_TIMEOUT, Pid, timeout),
    receive
        {Ref, Names} -> 
            erlang:demonitor(MRef, [flush]),
            Names;
        {'DOWN', MRef, process, Pid, Reason} -> 
            {error, Reason}
    end.

 make({Prefix, Suffix}) -> 
    list_to_atom(lists:append([Prefix, "@", Suffix]));
 make(NodeStr) ->
    make(parts(NodeStr)).

 parts(Node) when is_atom(Node) ->
    parts(atom_to_list(Node));
 parts(NodeStr) ->
    case lists:splitwith(fun (E) -> E =/= $@ end, NodeStr) of
        {Prefix, []} -> 
            {_, Suffix} = parts(node()),
            {Prefix, Suffix};
        {Prefix, Suffix} -> 
            {Prefix, tl(Suffix)}
    end.

 cookie_hash() ->
    base64:encode_to_string(erlang:md5(atom_to_list(erlang:get_cookie()))).

 is_running(Node, Application) ->
    case rpc:call(Node, app_utils, which_applications, []) of
        {badrpc, _} -> 
            false;
        Apps -> 
            proplists:is_defined(Application, Apps)
    end.

 is_process_running(Node, Process) ->
    case rpc:call(Node, erlang, whereis, [Process]) of
        {badrpc, _} -> 
            false;
        undefined -> 
            false;
        P when is_pid(P) -> 
            true
    end.
--- a/src/srvNodeMgr/reloader.erl
+++ b/src/srvNodeMgr/reloader.erl
@ -0,0 +1,165 @@
 %% @copyright 2007 Mochi Media, Inc.
 %% @author Matthew Dempsky <matthew@mochimedia.com>
 %%
 %% @doc Erlang module for automatically reloading modified modules
 %% during development.

 -module(reloader).
 -author("Matthew Dempsky <matthew@mochimedia.com>").

 -include_lib("kernel/include/file.hrl").

 -behaviour(gen_server).

 -export([start/0, start_link/0]).
 -export([stop/0]).
 -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
 -export([all_changed/0]).
 -export([is_changed/1]).
 -export([reload_modules/1]).
 -export([reload_all/0]).

 -record(state, {last, tref}).

 %% External API

 %% @spec start() -> ServerRet
 %% @doc Start the reloader.
 start() ->
    gen_server:start({local, ?MODULE}, ?MODULE, [], []).

 %% @spec start_link() -> ServerRet
 %% @doc Start the reloader.
 start_link() ->
    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).

 %% @spec stop() -> ok
 %% @doc Stop the reloader.
 stop() ->
    gen_server:call(?MODULE, stop).

 %% gen_server callbacks
 %% -define(RERODER_CHECK_TIME,  5000).

 %% @spec init([]) -> {ok, State}
 %% @doc gen_server init, opens the server in an initial state.
 init([]) ->
    %% {ok, TRef} = timer:send_interval(timer:seconds(1), doit),
    %% TimerRef = erlang:send_after(?RERODER_CHECK_TIME, self(), doit),
    %% tref = TimerRef}}.
    {ok, #state{last = stamp()}}.


 %% @spec handle_call(Args, From, State) -> tuple()
 %% @doc gen_server callback.
 handle_call(stop, _From, State) ->
    {stop, shutdown, stopped, State};
 handle_call(_Req, _From, State) ->
    {reply, {error, badrequest}, State}.

 %% @spec handle_cast(Cast, State) -> tuple()
 %% @doc gen_server callback.
 %% @spec handle_info(Info, State) -> tuple()
 %% @doc gen_server callback.
 handle_cast(doit, State) ->
    error_logger:info_msg("reloader do reload ... ~n", []),
    %% TimerRef = erlang:send_after(?RERODER_CHECK_TIME, self(), doit),
    Now = stamp(),
    try
        _ = doit(State#state.last, Now),
        %% tref = TimerRef
        error_logger:info_msg("reloader done ... ~n", []),
        {noreply, State#state{last = Now}}
    catch
        _:R ->
            error_logger:error_msg(
              "reload failed R:~w Stack:~p~n", [R, erlang:get_stacktrace()]),
            %% reloader failed, no state update
            {noreply, State}
    end;
 handle_cast(_Req, State) ->
    {noreply, State}.

 handle_info(_Info, State) ->
    {noreply, State}.

 %% @spec terminate(Reason, State) -> ok
 %% @doc gen_server termination callback.
 terminate(_Reason, _State) ->
    %% erlang:cancel_timer(State#state.tref),
    %% {ok, cancel} = timer:cancel(State#state.tref),
    ok.

 %% @spec code_change(_OldVsn, State, _Extra) -> State
 %% @doc gen_server code_change callback (trivial).
 code_change(_Vsn, State, _Extra) ->
    {ok, State}.

 %% @spec reload_modules([atom()]) -> [{module, atom()} | {error, term()}]
 %% @doc code:purge/1 and code:load_file/1 the given list of modules in order,
 %%      return the results of code:load_file/1.
 reload_modules(Modules) ->
    [begin code:purge(M), code:load_file(M) end || M <- Modules].

 %% @spec all_changed() -> [atom()]
 %% @doc Return a list of beam modules that have changed.
 all_changed() ->
    [M || {M, Fn} <- code:all_loaded(), is_list(Fn), is_changed(M)].

 %% @spec reload_all() -> [atom()]
 reload_all() ->
    gen_server:cast(?MODULE, doit).

 %% @spec is_changed(atom()) -> boolean()
 %% @doc true if the loaded module is a beam with a vsn attribute
 %%      and does not match the on-disk beam file, returns false otherwise.
 is_changed(M) ->
    try
        module_vsn(M:module_info()) =/= module_vsn(code:get_object_code(M))
    catch _:_ ->
            false
    end.

 %% Internal API

 module_vsn({M, Beam, _Fn}) ->
    {ok, {M, Vsn}} = beam_lib:version(Beam),
    Vsn;
 module_vsn(L) when is_list(L) ->
    {_, Attrs} = lists:keyfind(attributes, 1, L),
    {_, Vsn} = lists:keyfind(vsn, 1, Attrs),
    Vsn.

 doit(From, To) ->
    [case file:read_file_info(Filename) of
         {ok, #file_info{mtime = Mtime}} when Mtime >= From, Mtime < To ->
             reload(Module);
         {ok, _} ->
             unmodified;
         {error, enoent} ->
             %% The Erlang compiler deletes existing .beam files if
             %% recompiling fails.  Maybe it's worth spitting out a
             %% warning here, but I'd want to limit it to just once.
             gone;
         {error, Reason} ->
             error_logger:error_msg("Error reading ~s's file info: ~p~n",
                                    [Filename, Reason]),
             error
     end || {Module, Filename} <- code:all_loaded(), is_list(Filename)].

 reload(Module) ->
    error_logger:info_msg("Reloading ~p ...", [Module]),
    code:purge(Module),
    case code:load_file(Module) of
        {module, Module} ->
            error_logger:info_msg("reload ~w ok.~n", [Module]),
            reload;
        {error, Reason} ->
            error_logger:error_msg("reload fail: ~p.~n", [Reason]),
            error
    end.


 stamp() ->
    erlang:localtime().

--- a/src/srvNodeMgr/server_control_main.erl
+++ b/src/srvNodeMgr/server_control_main.erl
@ -0,0 +1,145 @@
 -module(server_control_main).

 -export([start/0]).

 -define(RPC_TIMEOUT, infinity).

 commands_desc() ->
    [{"stop", "停止游戏服务器进程"},
     {"stop_all", "停止游戏集群进程"},
     {"stop_app", "关闭server application"},
     {"start_app", "打开server application"},
     {"cluster_status", "集群状态"}].
 opt_spec_list() ->
    Node = case get(nodename) of
               undefined ->
                   throw(not_nodename);
               V ->
                   V
           end,
    [
     {help, $h, "help", undefined, "显示帮助，然后退出"},
     {node, undefined, "node", {atom, Node}, "管理节点"}
    ].
 usage() ->
    getopt:usage(opt_spec_list(), "server_ctl", "<command> [<args>]", commands_desc()),
    err_misc:quit(1).
 parse_arguments(CmdLine) ->
    case getopt:parse(opt_spec_list(), CmdLine) of
        {ok, {Opts, [Command | Args]}} ->
            {ok, {list_to_atom(Command), Opts, Args}};
        {ok, {_Opts, []}} ->
            no_command;
        Error ->
            io:format("Error ~p~n", [Error]),
            no_command
    end.

 start() ->
    {ok, [[NodeStr|_]|_]} = init:get_argument(nodename),
    put(nodename, list_to_atom(NodeStr)),
    {Command, Opts, Args} =
        case parse_arguments(init:get_plain_arguments()) of
            {ok, Res}  ->
                Res;
            no_command ->
                usage()
        end,
    Node = proplists:get_value(node, Opts),
    net_adm:ping(Node),
    timer:sleep(1000), %% wait auto find node
    %% The reason we don't use a try/catch here is that rpc:call turns
    %% thrown errors into normal return values
    % io:format("Opts ~p~n", [Opts]),
    case catch action(Command, Node, Args, Opts) of
        ok ->
            io:format("done.~n", []),
            quit(0);
        {ok, Info} ->
            io:format("done (~p).~n", [Info]),
            quit(0);
        Other ->
            io:format("other result ~p~n", [Other]),
            quit(2)
    end.

 action(info, Node, _Args, _Opts) ->
  io:format("System info for Node ~p~n", [Node]),
  Res = call(Node, {main, get_info, []}),
  io:format( " ~n   Scheduler id:                         ~p
               ~n   Num scheduler:                        ~p
               ~n   Process count:                        ~p
               ~n   Process limit:                        ~p
               ~n   Memory used by erlang processes:      ~p
               ~n   Memory allocated by erlang processes: ~p
               ~n   The total amount of memory allocated: ~p
               ~n",
               Res),
  ok;
 action(backup, Node, _Args, _Opts) ->
  case call(Node, {app_misc, backup, []}) of
      {error, Msg} ->
          io:format("~s~n", [Msg]);
      {ok, FileName} ->
         io:format("backup file:~s~n", [FileName]),
         io:format("backup file to remote ......~n", []),
         Result = os:cmd("bash copy_to_remote.sh " ++ FileName),
         io:format("~s~n", [Result])
  end,
  ok;

 action(pause_accept, Node, _Args, _Opts) ->
  io:format("Pause accept new client ~p~n", [Node]),
  call(Node, {app_misc, pause_accept, []}),
  ok;
 action(resume_accept, Node, _Args, _Opts) ->
  io:format("Resume accept new client ~p~n", [Node]),
  call(Node, {app_misc, resume_accept, []}),
  ok;
 action(accept_state, Node, _Args, _Opts) ->
  Res = call(Node, {app_misc, can_accept_new, []}),
  io:format("Node ~p accept state:~p~n ", [Node, Res]),
  ok;
 action(reload, Node, _Args, _Opts) ->
  io:format("Reloading node ~p~n", [Node]),
  call(Node, {main, reload, []});
 action(stop_all, MasterNode, _Args, _Opts) ->
    io:format("Stopping and halting all node~n", []),
    PidMRefs = [{spawn_monitor(fun() ->
                                      call(Node, {main, stop_and_halt, [5]})
                              end), Node}
                || Node <- nodes() -- [MasterNode]],
    [receive
         {'DOWN', MRef, process, _, normal} ->
             ok;
         {'DOWN', MRef, process, _, Reason} ->
             io:format("Node ~p Error, Reason ~p", [Node, Reason])
     end || {{_Pid, MRef}, Node} <- PidMRefs],
    call(MasterNode, {main, stop_and_halt, [5]}),
    ok;
 action(stop, Node, _Args, _Opts) ->
    io:format("Stopping and halting node ~p~n", [Node]),
    call(Node, {main, stop_and_halt, [5]});
 action(Command, _Node, Args, Opts) ->
    io:format("Command: ~p Args: ~p Opts: ~p~n", [Command, Args, Opts]),
    invalid_command.

 call(Node, {Mod, Fun, Args}) ->
    %%rpc_call(Node, Mod, Fun, lists:map(fun list_to_binary/1, Args)).
    rpc_call(Node, Mod, Fun, Args).

 rpc_call(Node, Mod, Fun, Args) ->
    rpc:call(Node, Mod, Fun, Args, ?RPC_TIMEOUT).


 quit(Status) ->
    case os:type() of
        {unix,  _} ->
            halt(Status);
        {win32, _} ->
            init:stop(Status),
            receive
            after infinity ->
                    ok
            end
    end.
--- a/src/srvNodeMgr/u.erl
+++ b/src/srvNodeMgr/u.erl
@ -0,0 +1,151 @@
 %%----------------------------------------------------
 %% Erlang模块热更新到所有线路（包括server的回调函数，如果对state有影响时慎用）
 %%
 %% 检查：u:c()                %% 列出前5分钟内编译过的文件
 %%       u:c(N)               %% 列出前N分钟内编译过的文件
 %%
 %% 更新：u:u()                %% 更新前5分钟内编译过的文件               
 %%       u:u(N)               %% 更新前N分钟内编译过的文件   
 %%       u:u([mod_xx, ...])   %% 指定模块（不带后缀名）
 %%       u:u(m)               %% 编译并加载文件
 %%
 %% Tips: u - update, c - check
 %% 
 %% @author rolong@vip.qq.com
 %%----------------------------------------------------

 -module(u).
 -compile(export_all).
 -include_lib("kernel/include/file.hrl").

 c() ->
    c(5).
 c(S) when is_integer(S) ->
 	c:cd("../ebin"),
    case file:list_dir(".") of
        {ok, FileList} -> 
            Files = get_new_file(FileList, S * 60),
            info("---------check modules---------~n~w~n=========check modules=========", [Files]);
        Any -> info("Error Dir: ~w", [Any])
    end;
 c([S]) when is_atom(S) ->
 	S1 = tool:to_integer(tool:to_list(S)),
 	case is_integer(S1) of
 		true  ->
 			c:cd("../ebin"),
    		case file:list_dir(".") of
 				{ok, FileList} -> 
            		Files = get_new_file(FileList, S * 60),
            		info("---------check modules---------~n~w~n=========check modules=========", [Files]);
        		Any -> info("Error Dir: ~w", [Any])
    		end;
 		_ ->
 			info("ERROR======> Badarg ~p/~p ~n", [S, S1])
 	end;
 c(S) -> info("ERROR======> Badarg ~p ~n", [S]).

 admin()->
    spawn(fun()->u(m) end),
    ok.

 u() ->
    u(5).
 u(m) ->
    StartTime = util:unixtime(),
    info("----------makes----------", []),
    c:cd("../"),
    make:all(),
    c:cd("ebin"),
    EndTime = util:unixtime(),
    Time = EndTime - StartTime,
    info("Make Time : ~w s", [Time]),
    u(Time / 60);
 u(S) when is_number(S) ->
    case file:list_dir(".") of
        {ok, FileList} -> 
            Files = get_new_file(FileList, util:ceil(S * 60) + 3),
 			load(Files);
 %%             AllZone = mod_node_interface:server_list(),
 %%             info("---------modules---------~n~w~n----------nodes----------", [Files]),
 %%             loads(AllZone, Files);
        Any -> info("Error Dir: ~w", [Any])
    end;
 u(Files) when is_list(Files) ->
    load(Files);
 %% 	AllZone = mod_node_interface:server_list(),
 %%     info("---------modules---------~n~w~n----------nodes----------", [Files]),
 %%     loads(AllZone, Files);
 u(_) -> info("ERROR======> Badarg", []).

 %% m(['src/data/*','src/lib/lib_goods.erl'])
 m(Files) when is_list(Files) ->
    StartTime = util:unixtime(),
    info("----------makes----------~n~w~n", [Files]),
    c:cd("../"),
    Res = make:files(Files, [debug_info,{i, "include"},{outdir, "ebin"}]),
    c:cd("ebin"),
    EndTime = util:unixtime(),
    Time = EndTime - StartTime,
    info("Make Time : ~w s", [Time]),
    Res.

 info(V) ->
    info(V, []).
 info(V, P) ->
    io:format(V ++ "~n", P).

 %% 更新到所有线路,暂时处理单节点的情况
 %% loads([], _Files) -> ok;
 %% loads([H | T], Files) ->
 %%     info("[~w]", [H#t_server_node.node]),
 %%     rpc:cast(H#t_server_node.node, u, load, [Files]),
 %%     loads(T, Files).

 get_new_file(Files, S) -> 
    get_new_file(Files, S, []).
 get_new_file([], _S, Result) -> Result;
 get_new_file([H | T], S, Result) ->
 	NewResult = case string:tokens(H, ".") of
 					[Left, Right] when Right =:= "beam" ->
 						case file:read_file_info(H) of
 							{ok, FileInfo} -> 
 								Now = calendar:local_time(),
 								case calendar:time_difference(FileInfo#file_info.mtime, Now) of
 									{Days, Times} -> 
 										Seconds = calendar:time_to_seconds(Times), 
 										case Days =:= 0 andalso Seconds < S of
 											true ->
 												FileName = list_to_atom(Left),
 												[FileName | Result];
 											false -> Result
 										end;
 									_ -> Result
 								end;
 							_ -> Result
 						end;
 					_ -> Result
 				end,
 	get_new_file(T, S, NewResult).

 load([]) -> ok;
 load([FileName | T]) ->
    c:l(FileName),
    info("loaded: ~w", [FileName]),
    load(T).
 %    case code:soft_purge(FileName) of
 %        true ->
 %            case code:load_file(FileName) of
 %                {module, _} ->
 %                    info("loaded: ~w", [FileName]),
 %                    ok;
 %                    %% info("loaded: ~w", [FileName]);
 %                {error, What} -> info("ERROR======> loading: ~w (~w)", [FileName, What])
 %            end;
 %        false -> info("ERROR======> Processes lingering : ~w [zone ~w] ", [FileName, srv_kernel:zone_id()])
 %    end,
 %    load(T).


 a() ->
 	c(),
 	u().
--- a/src/srvNodeMgr/utAppStart.erl
+++ b/src/srvNodeMgr/utAppStart.erl
--- a/src/srvNodeMgr/utSrvManager.erl
+++ b/src/srvNodeMgr/utSrvManager.erl
--- a/src/srvNodeMgr/utStopSrv1.escript
+++ b/src/srvNodeMgr/utStopSrv1.escript
--- a/src/srvNodeMgr/utStopSrv2.escript
+++ b/src/srvNodeMgr/utStopSrv2.escript
--- a/src/testCase/utTestDS.erl
+++ b/src/testCase/utTestDS.erl
@ -0,0 +1,691 @@
 -module(utTestDS).
 -compile([export_all, nowarn_unused_function, nowarn_unused_vars, nowarn_export_all]).

 简单介绍一下Erlang常用数据结构的内部实现和特性，主要参考Erlang OTP 18.0源码，和网上很多优秀博客(参见附录)，整理了一些自己项目中常用到的。

 Erlang虚拟机使用一个字(64/32位)来表示所有类型的数据，即Eterm。具体的实施方案通过占用Eterm的后几位作为类型标签，然后根据标签类型来解释剩余位的用途。这个标签是多层级的，最外层占用两位，有三种类型：

 list，剩下62位是指向列表Cons的指针
 boxed对象，即复杂对象，剩余62位指向boxed对象的对象头。包括元组，大整数，外部Pid/Port等
 immediate立即数，即可以在一个字中表示的小型对象，包括小整数，本地Pid/Port，Atom，NIL等
 这三种类型是Erlang类型的大框架，前两者是可以看做是引用类型，立即数相当于是值类型，但无论对于哪种类型，Erlang Eterm本身只占用一个字，理解这一点是很重要的。

 对于二三级标签的细分和编码，一般我们无需知道这些具体的底层细节，以下是几种常用的数据结构实现方式。

 一. 常用类型
 1. atom
 atom用立即数表示，在Eterm中保存的是atom在全局atom表中的索引，依赖于高效的哈希和索引表，Erlang的atom比较和匹配像整数一样高效。atom表是不回收的，并且默认最大值为1024*1024，超过这个限制Erlang虚拟机将会崩溃，可通过+t参数调整该上限。

 2.Pid/Port
 /*  erts/emulator/beam/erl_term.h

 *
 *  Old pid layout(R9B及之前):
 *
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *   |s s s|n n n n n n n n n n n n n n n|N N N N N N N N|c c|0 0|1 1|
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 *  s : serial  每次n到达2^15之后 自增一次 然后n重新从低位开始
 *  n : number  15位, 进程在本地进程表中的索引
 *  c : creation 每次节点重启，该位自增一次
 *  N : node number 节点名字在atom表中索引
 *
 *
 *  PID layout (internal pids):
 *
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *   |n n n n n n n n n n n n n n n n n n n n n n n n n n n n|0 0|1 1|
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 *  n : number 28位进程Pid
 */
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 在Old Pid表示中(R9B及之前版本)，在32位中表示了整个Pid，包括其节点名字等信息，也就是本地进程和外部进程都可以用Eterm立即数表示，显示格式为<N, n, s>。

 在R9B之后，随着进程数量增加和其它因素，Pid只在32位中表示本地Pid(A=0)，将32位中除了4位Tag之外的28位，都可用于进程Pid表示，出于Pid表示的历史原因，仍然保留三段式的显示，本地Pid表示变成了<0, Pid低15位, Pid高13位>。对于外部Pid，采用boxed复合对象表示，在将本地Pid发往其它node时，Erlang会自动将为Pid加上本地节点信息，并打包为一个boxed对象，占用6个字。另外，Erlang需要维护Pid表，每个条目占8个字节，当进程数量过大时，Pid表将占用大量内存，Erlang默认可以使用18位有效位来表示Pid(262144)，可通过+P参数调节，最大值为27位(2^27-1)，此时Pid表占用内存为2G。

 Eshell V8.1  (abort with ^G)
 (n1@T4F-MBP-11)1> node().
 'n1@T4F-MBP-11'
 % 节点名的二进制表示
 (n1@T4F-MBP-11)2> term_to_binary(node()).
 <<131,100,0,13,110,49,64,84,52,70,45,77,66,80,45,49,49>>
 (n1@T4F-MBP-11)3> self().
 <0.63.0>
 % term_to_binary会将A对应的节点名编码进去
 (n1@T4F-MBP-11)4> term_to_binary(self()).
 <<131,103,100,0,13,110,49,64,84,52,70,45,77,66,80,45,49,
 49,0,0,0,63,0,0,0,0,2>>
 (n1@T4F-MBP-11)5>
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 3. lists
 列表以标签01标识，剩余62位指向列表的Cons单元，Cons是[Head|Tail]的组合，在内存中体现为两个相邻的Eterm，Head可以是任何类型的Eterm，Tail是列表类型的Eterm。因此形如L2 = [Elem|L1]的操作，实际上构造了一个新的Cons，其中Head是Elem Eterm，Tail是L1 Eterm，然后将L2的Eterm指向了这个新的Cons，因此L2即代表了这个新的列表。对于[Elem|L2] = L1，实际上是提出了L1 Eterm指向的Cons，将Head部分赋给Elem，Tail部分赋给L2，注意Tail本身就是个List的Eterm，因此list是单向列表，并且构造和提取操作是很高效的。需要再次注意的是，Erlang所有类型的Eterm本身只占用一个字大小。这也是诸如list,tuple能够容纳任意类型的基础。

 Erlang中进程内对对象的重复引用只需占用一份对象内存(只是Eterm本身一个字的拷贝)，但是在对象跨进程时，对象会被展开，执行速深度拷贝：

 Eshell V7.0.2  (abort with ^G)
 1> L1 = [1,2,3].
 [1,2,3]
 2> erts_debug:size(L1).
 6
 3> L2 = [L1,L1,L1].
 [[1,2,3],[1,2,3],[1,2,3]]
 4> erts_debug:size(L2).		  % 获得L2对象树的大小 3*2+6
 12
 5> erts_debug:flat_size(L2). 	% 获得对象平坦展开后的大小 3*(2+6)
 24
 6> P1 = spawn(fun() -> receive L -> io:format("~p~n",[erts_debug:size(L)]) end end).
 <0.45.0>
 7> P1 ! L2.					  % 在跨进程时，对象被展开 执行深度拷贝
 24
 [[1,2,3],[1,2,3],[1,2,3]]
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 此时L1, L2的内存布局如下：
 在这里插入图片描述

 4. tuple
 tuple属于boxed对象的一种，每个boxed对象都有一个对象头(header)，boxed Eterm即指向这个header，这个header里面包含具体的boxed对象类型，如tuple的header末6位为000000，前面的位数为tuple的size：
 在这里插入图片描述
 tuple实际上就是一个有头部的数组，其包含的Eterm在内存中紧凑排列，tuple的操作效率和数组是一致的。

 list和tuple是erlang中用得最多的数据结构，也是其它一些数据结构的基础，如record，map，摘下几个关于list，tuple操作的常用函数，便于加深对结构的理解：

 // 位于 $OTP_SRC/erts/emulator/beam/bif.c
 BIF_RETTYPE tuple_to_list_1(BIF_ALIST_1)
 {
 Uint n;
 Eterm *tupleptr;
 Eterm list = NIL;
 Eterm* hp;

 if (is_not_tuple(BIF_ARG_1))  {
 BIF_ERROR(BIF_P, BADARG);
 }

 // 得到tuple Eterm所指向的tuple对象头
 tupleptr = tuple_val(BIF_ARG_1);
 // 得到对象头中的tuple size
 n = arityval(*tupleptr);
 hp = HAlloc(BIF_P, 2 * n);
 tupleptr++;

 // 倒序遍历 因为list CONS的构造是倒序的
 while(n--) {
 // 相当于hp[0]=tupleptr[n]; hp[1] = list; list = make_list(hp);
 // 最后返回的是指向hp的list Eterm
 list = CONS(hp, tupleptr[n], list);
 hp += 2;
 }
 BIF_RET(list);
 }

 BIF_RETTYPE list_to_tuple_1(BIF_ALIST_1)
 {
 Eterm list = BIF_ARG_1;
 Eterm* cons;
 Eterm res;
 Eterm* hp;
 int len;

 if ((len = erts_list_length(list)) < 0 || len > 		ERTS_MAX_TUPLE_SIZE) {
 BIF_ERROR(BIF_P, BADARG);
 }
 // 元素个数 + 对象头
 hp = HAlloc(BIF_P, len+1);
 res = make_tuple(hp);
 *hp++ = make_arityval(len);
 while(is_list(list)) {
 cons = list_val(list);
 *hp++ = CAR(cons);
 list = CDR(cons);
 }
 BIF_RET(res);
 }
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 可以看到，list，tuple中添加元素，实际上都是在拷贝Eterm本身，Erlang虚拟机会追踪这些引用，并负责垃圾回收。

 5. binary
 Erlang binary用于处理字节块，Erlang其它的数据结构(list,tuple,record)都是以Eterm为单位的，用于处理字节块会浪费大量内存，如”abc”占用了7个字(加上ETerm本身)，binary为字节流提供一种操作高效，占用空间少的解决方案。

 之前我们介绍的数据结构都存放在Erlang进程堆上，进程内部可以使用对象引用，在对象跨进程传输时，会执行对象拷贝。为了避免大binary跨进程传输时的拷贝开销，Erlang针对binary作出了优化，将binary分为小binary和大binary。

 heap binary
 小于64字节(定义于erl_binary.h ERL_ONHEAP_BIN_LIMIT宏)的小binary直接创建在进程堆上，称为heap binary，heap binary是一个boxed对象：

 typedef struct erl_heap_bin {
 Eterm thing_word;		/* Subtag HEAP_BINARY_SUBTAG. */
 Uint size;				/* Binary size in bytes. */
 Eterm data[1];			/* The data in the binary. */
 } ErlHeapBin;
 1
 2
 3
 4
 5
 refc binary
 大于64字节的binary将创建在Erlang虚拟机全局堆上，称为refc binary(reference-counted binary)，可被所有Erlang进程共享，这样跨进程传输只需传输引用即可，虚拟机会对binary本身进行引用计数追踪，以便GC。refc binary需要两个部分来描述，位于全局堆的refc binary数据本身和位于进程堆的binary引用(称作proc binary)，这两种数据结构定义于global.h中。下图描述refc binary和proc binary的关系：
 在这里插入图片描述
 所有的OffHeap(进程堆之外的数据)被组织为一个单向链表，进程控制块(erl_process.h struct process)中的off_heap字段维护链表头和所有OffHeap对象的总大小，当这个大小超过虚拟机阀值时，将导致一次强制GC。注意，refc binary只是OffHeap对象的一种，以后可扩展其它种类。

 sub binary
 sub binary是Erlang为了优化binary分割的(如split_binary/2)，由于Erlang变量不可变语义，拷贝分割的binary是效率比较底下的做法，Erlang通过sub binary来复用原有binary。ErlSubBin定义于erl_binary.h，下图描述split_binary(ProBin, size1)返回一个ErlSubBin二元组的过程：
 在这里插入图片描述
 ProBin的size可能小于refc binary的size，如上图中的size3，这是因为refc binary通常会通过预分配空间的方式进行优化。

 要注意的是，sub binary只引用proc binary(通过orig)，而不直接引用refc binary，因此图中refc binary的refc字段仍然为1。只要sub binary还有效，对应的proc binary便不会被GC，refc binary的计数也就不为0。

 bit string
 当我们通过如<<2:3,3:6>>的位语法构建binary时，将得到<<65,1:1>>这种非字节对齐的数据，即二进制流，在Erlang中被称为bitstring，Erlang的bitstring基于ErlSubBin结构实现，此时bitsize为最后一个字节的有效位数，size为有效字节数(不包括未填满的最后一个字节)，对虚拟机底层来说，sub bianry和bit string是同一种数据结构。

 binary追加构造优化
 在通过C = <<A/binary,B/binary>>追加构造binary时，最自然的做法应当是创建足够空间的C(heap or refc)，再将A和B的数据拷贝进去，但Erlang对binary的优化不止于此，它使用refc binary的预留空间，通过追加的方式提高大binary和频繁追加的效率。

 Bin0 = <<0>>,                    %% 创建一个heap binary Bin0
 Bin1 = <<Bin0/binary,1,2,3>>,    %% 追加目标不是refc binary，创建一个refc binary，预留256字节空间，用Bin0初始化，并追加1,2,3
 Bin2 = <<Bin1/binary,4,5,6>>,    %% 追加目标为refc binary且有预留空间 直接追加4,5,6
 Bin3 = <<Bin2/binary,7,8,9>>,    %% 同样，将7,8,9追加refc binary预留空间
 Bin4 = <<Bin1/binary,17>>,       %% 此时不能直接追加，否则会覆盖Bin2内容，虚拟机会通过某种机制发现这一点，然后将Bin1拷贝到新的refc binary，再执行追加
 {Bin4,Bin3}

 % 通过erts_get_internal_state/1可以获取binary状态
 % 对应函数源码位于$BEAM_SRC/erl_bif_info.c erts_debug_get_internal_state_1
 f() ->
 B0 = <<0>>,
 erts_debug:set_internal_state(available_internal_state,true), % 打开内部状态获取接口 同一个进程只需执行一次
 f2(B0). % 通过参数传递B0 是为了避免虚拟机优化 直接构造B1为heap binary

 f2(B0) ->
   io:format("B0: ~p~n", [erts_debug:get_internal_state({binary_info,B0})]),
   B1 = <<B0/binary, 1,2,3>>,
   io:format("B1: ~p~n", [erts_debug:get_internal_state({binary_info,B1})]),
   B2 = <<B1/binary, 4,5,6>>,
   io:format("B2: ~p~n", [erts_debug:get_internal_state({binary_info,B2})]),
   ok.

 % get_internal_state({binary_info, B})返回格式:
 % proc binary：{refc_binary, pb_size, {binary, orig_size}, pb_flags}
 % heap binary：heap_binary
 B0: heap_binary
 B1: {refc_binary,4,{binary,256},3}
 B2: {refc_binary,7,{binary,256},3}
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 binary追加实现源码位于$BEAM_SRC/erl_bits.c erts_bs_append，B1和B2本身是sub binary，基于同一个ProcBin，可追加的refc binary只能被一个ProcBin引用，这是因为可追加refc binary可能会在追加过程中重新分配空间，此时要更新ProcBin引用，而refc binary无法快速追踪到其所有ProcBin引用(只能遍历)，另外，多个ProcBin上的sub binary可能对refc binary覆写。

 只有最后追加得到的sub binary才可执行快速追加(通过sub binary和对应ProBin flags来判定)，否则会拷贝并分配新的可追加refc binary。所有的sub binary都是指向ProcBin或heap binary的，不会指向sub binary本身。
 在这里插入图片描述

 binary降级
 Erlang通过追加优化构造出的可追加refc binary通过空间换取了效率，并且这类refc binary只能被一个proc binary引用(多个proc binary上的sub binary会造成覆写，注意，前面的B1，B2是sub binary而不是ProBin)。比如在跨进程传输时，原本只需拷贝ProBin，但对可追加的refc binary来说，不能直接拷贝ProBin，这时需对binary降级，即将可追加refc binary降级为普通refc binary：

 bs_emasculate(Bin0) ->
 Bin1 = <<Bin0/binary, 1, 2, 3>>,
 NewP = spawn(fun() -> receive _ -> ok end end),
 io:format("Bin1 info: ~p~n", [erts_debug:get_internal_state({binary_info, Bin1})]),
 NewP ! Bin1,
 io:format("Bin1 info: ~p~n", [erts_debug:get_internal_state({binary_info, Bin1})]),
 Bin2 = <<Bin1/binary, 4, 5, 6>>, % Bin1被收缩 这一步会执行refc binary拷贝
 io:format("Bin2 info: ~p~n", [erts_debug:get_internal_state({binary_info, Bin2})]),
 Bin2.

 % 运行结果
 117> bs_emasculate(<<0>>).
 Bin1 info: {refc_binary,4,{binary,256},3}
 Bin1 info: {refc_binary,4,{binary,4},0}
 Bin2 info: {refc_binary,7,{binary,256},3}
 <<0,1,2,3,4,5,6>>
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 降级操作会重新创建一个普通的refc binary(原有可追加refc binary会被GC?)，同时，降级操作会将B1的flags置0，这保证基于B1的sub binary在执行追加时，会重新拷贝分配refc binary。

 // 降级函数($BEAM_SRC/erl_bits.c)
 void erts_emasculate_writable_binary(ProcBin* pb)
 {
 Binary* binp;
 Uint unused;

 pb->flags = 0;
 binp = pb->val;
 ASSERT(binp->orig_size >= pb->size);
 unused = binp->orig_size - pb->size;
 /* Our allocators are 8 byte aligned, i.e., shrinking with
 less than 8 bytes will have no real effect */
 if (unused >= 8) {
 // 根据ProBin中的有效字节数，重新创建一个不可追加的refc binary
 binp = erts_bin_realloc(binp, pb->size);
 pb->val = binp;
 pb->bytes = (byte *) binp->orig_bytes;
 }
 }
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 Q: ProcBin B1的字段被更新了，那么Erlang上层如何维护变量不可变语义?

 A: 变量不可变指的是Erlang虚拟机上层通过底层屏蔽后所能看到的不变语义，而不是变量底层实现，诸如Pid打包，maps
 hash扩展等，通过底层差异化处理后，对上层体现的语义和接口都没变，因此我们将其理解为”变量不可变”)。

 另外，全局堆GC也可能会对可追加refc binary的预留空间进行收缩(shrink)，可参考$BEAM_SRC/erl_gc.c sweep_off_heap函数。

 以上都是理论的实现，实际上Erlang虚拟机对二进制还做了一些基于上下文的优化，通过bin_opt_info编译选项可以打印出这些优化。关于binary优化的更多细节，参考Constructing and Matching Binaries。

 二. 复合类型
 基于list和tuple之上，Erlang还提供了一些其它的数据结构，这里列举几个key/value相关的数据结构，在服务器中会经常用到。

 1. record
 这个类型无需过多介绍，它就是一个tuple，所谓record filed在预编译后实际上都是通过数值下标来索引，因此它访问field是O(1)复杂度的。

 2. map
 虽然record的语法糖让我们在使用tuple时便利了不少，但是比起真正的key/value结构仍然有许多限制，如key只能是原子，key不能动态添加或删除，record变动对热更的支持很差等。proplists能够一定程度地解决这种问题，但是它适合键值少的情况，通常用来做选项配置，并且不能保证key的唯一。

 map是OTP 17引进的数据结构，是一个boxed对象，它支持任意类型的Key，模式匹配，动态增删Key等，并且最新的mongodb-erlang直接支持map。

 在OTP17中，map的内存结构为：

 //位于 $OTP_SRC/erts/emulator/beam/erl_map.h
 typedef struct map_s {
 Eterm thing_word;	// 	boxed对象header
 Uint  size;			// 	map 键值对个数
 Eterm keys;      	// 	keys的tuple
 } map_t;
 1
 2
 3
 4
 5
 6
 该结构体之后就是依次存放的Value，因此maps的get操作，需要先遍历keys tuple，找到key所在下标，然后在value中取出该下标偏移对应的值。因此是O(n)复杂度的。详见maps:get源码($BEAM_SRC/erl_map.c erts_maps_get)。

 如此的maps，只能作为record的替用，并不是真正的Key->Value映射，因此不能存放大量数据。而在OTP18中，maps加入了针对于big map的hash机制，当maps:size < MAP_SMALL_MAP_LIMIT时，使用flatmap结构，也就是上述OTP17中的结构，当maps:size >= MAP_SMALL_MAP_LIMIT时，将自动使用hashmap结构来高效存取数据。MAP_SMALL_MAP_LIMIT在erl_map.h中默认定义为32。

 仍然要注意Erlang本身的变量不可变原则，每次执行更新maps，都会导致新开辟一个maps，并且拷贝原maps的keys和values，在这一点上，maps:update比maps:put更高效，因为前者keys数量不会变，因此无需开辟新的keys tuple，拷贝keys tuples ETerm即可。实际使用maps时：

 更新已有key值时，使用update(:=)而不是put(=>)，不仅可以检错，并且效率更高
 当key/value对太多时，对其进行层级划分，保证其拷贝效率
 实际测试中，OTP18中的maps在存取大量数据时，效率还是比较高的，这里有一份maps和dict的简单测试函数，可通过OTP17和OTP18分别运行来查看效率区别。通常情况下，我们应当优先使用maps，比起dict，它在模式匹配，mongodb支持，可读性上都有很大优势。

 3. array
 Erlang有个叫array的结构，其名字容易给人误解，它有如下特性：

 array下标从0开始
 array有两种模式，一种固定大小，另一种按需自动增长大小，但不会自动收缩
 支持稀疏存储，执行array:set(100,value,array:new())，那么[0,99]都会被设置为默认值(undefined)，该默认值可修改。
 在实现上，array最外层被包装为一个record:
 -record(array, {
 size :: non_neg_integer(),	%% number of defined entries
 max  :: non_neg_integer(),	%% maximum number of entries
 default,	%% the default value (usually 'undefined')
 elements :: elements(_)     %% the tuple tree
 }).
 1
 2
 3
 4
 5
 6
 elements是一个tuple tree，即用tuple包含tuple的方式组成的树，叶子节点就是元素值，元素默认以10个为一组，亦即完全展开的情况下，是一颗十叉树。但是对于没有赋值的节点，array用其叶子节点数量代替，并不展开：

 Eshell V7.0.2  (abort with ^G)
 1> array:set(9,value,array:new()).
 {array,10,10,undefined, % 全部展开
 {undefined,undefined,undefined,undefined,undefined,
 undefined,undefined,undefined,undefined,value}}

 % 只展开了19所在的子树 其它9个节点未展开
 % 注意tuple一共有11个元素，最后一个元素代表本层节点的基数，这主要是出于效率考虑，能够快速检索到元素所在子节点
 2> array:set(19,value,array:new()).
 {array,20,100,undefined,
 {10,
 {undefined,undefined,undefined,undefined,undefined，	undefined,undefined,undefined,undefined,value},
 10,10,10,10,10,10,10,10,10}}

 % 逐级展开了199所在的子树
 3> array:set(199,value,array:new()).
 {array,200,1000,undefined,
 {100,
 {10,10,10,10,10,10,10,10,10,
 {undefined,undefined,undefined,undefined,undefined,
 undefined,undefined,undefined,undefined,value},
 10},
 100,100,100,100,100,100,100,100,100}}
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 由于完全展开的tuple tree是一颗完全十叉树，因此实际上array的自动扩容也是以10为基数的。在根据Index查找元素时，通过div/rem逐级算出Index所属节点:

 %% 位于$OTP_SRC/lib/stdlib/src/array.erl
 get(I, #array{size = N, max = M, elements = E, default = D})
 when is_integer(I), I >= 0 ->
 if I < N ->		% 有效下标
 get_1(I, E, D);
 M > 0 ->		% I>=N 并且 array处于自动扩容模式 直接返回DefaultValue
 D;
 true ->		% I>=N 并且 array为固定大小  返回badarg
 erlang:error(badarg)
 end;
 get(_I, _A) ->
 erlang:error(badarg).

 %% The use of NODEPATTERN(S) to select the right clause is just a hack,
 %% but it is the only way to get the maximum speed out of this loop
 %% (using the Beam compiler in OTP 11).

 % -define(NODEPATTERN(S), {_,_,_,_,_,_,_,_,_,_,S}). % NODESIZE+1 elements!
 get_1(I, E=?NODEPATTERN(S), D) ->		% 到达已展开的中间节点 向下递归
   get_1(I rem S, element(I div S + 1, E), D);
 get_1(_I, E, D) when is_integer(E) ->	% 到达未展开的中间节点 返回默认值
   D;
 get_1(I, E, _D) ->						% 到达叶子节点层
   element(I+1, E).

 set(I, Value, #array{size = N, max = M, default = D, elements = E}=A)
   when is_integer(I), I >= 0 ->
   if I < N ->
      A#array{elements = set_1(I, E, Value, D)};
      I < M ->		% 更新size, size的主要作用是让读取更加高效
         %% (note that this cannot happen if M == 0, since N >= 0)
         A#array{size = I+1, elements = set_1(I, E, Value, D)};
      M > 0 ->		% 自动扩容
         {E1, M1} = grow(I, E, M),
         A#array{size = I+1, max = M1,
            elements = set_1(I, E1, Value, D)};
      true ->
         erlang:error(badarg)
   end;
 set(_I, _V, _A) ->
   erlang:error(badarg).

 %% See get_1/3 for details about switching and the NODEPATTERN macro.

 set_1(I, E=?NODEPATTERN(S), X, D) ->		% 所在节点已展开，向下递归
   I1 = I div S + 1,
   setelement(I1, E, set_1(I rem S, element(I1, E), X, D));
 set_1(I, E, X, D) when is_integer(E) ->	% 所在节点未被展开，递归展开节点 并赋值
   expand(I, E, X, D);
 set_1(I, E, X, _D) ->						% 到达叶子节点
   setelement(I+1, E, X).
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 更多细节可以参见源码，了解了这些之后，再来看看Erlang array和其它语言数组不一样的地方：

 索引不是O(1)复杂度，而是O(log10n)

 array并不自动收缩
 array中的max和size字段，和array具体占用内存没多大关系(节点默认未展开)
 array中并没有subarray之类的操作，因为它根本不是线性存储的，而是树形的，因此如果用它来做递归倒序遍历之类的操作，复杂度不是O(n)，而是O(n*log10n)
 array中对于没有赋值的元素，给予默认值undefined，这个默认值可以在array:new()中更改，对使用者来说，明确赋值undefined和默认值undefined并无多大区别，但对array内部来说，可能会导致节点展开。

 6.2  Is there a collection of data structures, e.g. balanced trees?
 链表和元组一样，是Erlang的基本组成部分。其他标准数据结构是：

 Module	Description
 sets	   sets, a collection of unique elements.
 gb_sets	sets, but based on a general balanced data structure
 gb_tree	a general balanced tree
 dict	   maps, also called associative arrays
 queue	   double-ended queues
 ets	   hash tables and ordered sets (trees), stored outside the process
 dets	   on-disk hash tables
 表6.1：  标准数据结构
 (请注意：不常用的模块ordset和 orddict只是有序列表，因此对于诸如插入之类的常见操作具有O（n）)
 % Suggestion：
 % elments count: 0 － 100 | 100 - 10000  |  10000 -
 % our select   :  list   |      ets     |  gb_tree

 %% 用于测试erlang各种数据结构 读写遍历等操作的效率
 %%  lists ，maps 和record是erlang最为常用的数据结构，lists使用方便简单，maps则查询高效，record则需要预定义，
 %% 可扩展性差，各有各的优。本文做一下lists和maps的性能对比（再对比一下dict），代码如下（record操作不便则不做比较）。

 %%通过注释部分代码做以下测试
 %%timer:tc(lib_test, test_struct, [10000,#{}]).
 %%timer:tc(lib_test, test_struct, [10000,[]]).
 test_struct(0, R) ->
   Fun = fun({K, V}) -> K + 1, V + 1 end, lists:foreach(Fun, R),      %%遍历测试
   Fun = fun(K, V) -> K + 1, V + 1 end, maps:map(Fun, R),
   ok;
 test_struct(Num, R) ->
   NewR = [{Num, Num} | R], lists:keyfind(5000, 1, NewR),   %%插入查询测试
   NewR = R#{Num=>Num}, maps:get(5000, NewR, 0),
   test_struct(Num - 1, NewR).
 %% 做10000次的插入查询测试结果：
 %%
 %%     lists 50736微秒
 %%     maps 4670微秒
 %%     dict 60236微秒
 %% 做10000次的遍历结果：
 %%
 %%     lists 523微秒
 %%     maps 8337微秒
 %%     dict 4426微秒
 %% 对比总结：
 %%
 %%     对比测试数据maps在查询性能上比lists高10倍以上， 而在遍历上lists则更优。对于频繁插入和查询的数据，maps是最佳的选择，
 %%     lists则适用于广播列表之类需要遍历的数据。除此之外，个人觉得在使用上lists 比maps更为方便，因为lists模块提供了大量实用的函数，
 %%     单单在排序上，maps的实用性就不如lists了，所以在数据结构选择上就需要多多斟酌。另外record在查询上使用的是模式匹配，性能只会更高，
 %%     但需要提前定义字段，可扩展性差，在热更这块有不少坑，maps也可以用模式匹配查询，但也要确保key值存在，不然就nomatch，
 %%     但整体上maps更优于record，故建议用maps替代record。
--- a/src/testCase/utTestMd5.erl
+++ b/src/testCase/utTestMd5.erl
@ -100,6 +100,21 @@ tt7(N, Hex) ->
   String,
   tt7(N - 1, Hex).

 u4(0, Fun) ->
   ?MODULE:Fun();
 u4(N, Fun) ->
   ?MODULE:Fun(),
   u4(N - 1, Fun).

 uuid() ->
   erlang:md5(term_to_binary({erlang:system_time(nanosecond), rand:uniform(134217727), make_ref()})).
 uuid2() ->
   term_to_binary({erlang:system_time(nanosecond), rand:uniform(134217727), make_ref()}).

 get_uuid() ->
   <<(crypto:strong_rand_bytes(8))/bytes,
      (erlang:term_to_binary(erlang:now()))/bytes>>.

 u1(0) ->
   crypto:strong_rand_bytes(16);
 u1(N) ->
--- a/src/testCase/utTestPerformance.erl
+++ b/src/testCase/utTestPerformance.erl
@ -177,3 +177,49 @@ st3() ->

 st4() ->
   size(<<"fdfdfdd:fdffd:\rn\n:fdfd fd df df dfddfdf">>).

 gm(0, Fun) ->
   ok;
 gm(N, Fun) ->
   [?MODULE:Fun(M) || M <- [1,2,3,4,5,6,7,8,9,10,11,12]],
   gm(N - 1, Fun).

 %% 这个更快
 getMonth(1) ->
   <<"Jan">>;
 getMonth(2) ->
   <<"Feb">>;
 getMonth(3) ->
   <<"Mar">>;
 getMonth(4) ->
   <<"Apr">>;
 getMonth(5) ->
   <<"May">>;
 getMonth(6) ->
   <<"Jun">>;
 getMonth(7) ->
   <<"Jul">>;
 getMonth(8) ->
   <<"Aug">>;
 getMonth(9) ->
   <<"Sep">>;
 getMonth(10) ->
   <<"Oct">>;
 getMonth(11) ->
   <<"Nov">>;
 getMonth(12) ->
   <<"Dec">>.

 getMonth2(Month) ->
   element(Month,{<<"Jan">>,<<"Feb">>,<<"Mar">>,<<"Apr">>,<<"May">>,<<"Jun">>,<<"Jul">>,<<"Aug">>,<<"Sep">>,<<"Oct">>,<<"Nov">>,<<"Dec">>}).

 -define(Month, #{1 => <<"Jan">>, 2 => <<"Feb">>, 3 => <<"Mar">>, 4 => <<"Apr">>, 5 => <<"May">>, 6 => <<"Jun">>, 7 => <<"Jul">>, 8 => <<"Aug">>, 9 => <<"Sep">>, 10 => <<"Oct">>, 11 => <<"Nov">>, 12 => <<"Dec">>}).
 getMonth3(Month) ->
   case ?Month of
      #{Month := MonthStr} ->
         MonthStr;
      _ ->
         <<"">>
   end.