1616start_link (Pool , ListenOpts , AcceptorOpts ) ->
1717 gen_server :start_link (? MODULE , [Pool , ListenOpts , AcceptorOpts ], []).
1818
19-
2019% % gen_server api
2120
2221init ([Pool , ListenOpts , PoolOpts ]) ->
23- % % Trapping exit so can close socket in terminate/2
24- _ = process_flag (trap_exit , true ),
2522 Port = maps :get (port , ListenOpts , 8080 ),
2623 IPAddress = maps :get (ip , ListenOpts , {0 , 0 , 0 , 0 }),
2724 AcceptorPoolSize = maps :get (size , PoolOpts , 10 ),
2825 SocketOpts = maps :get (socket_options , ListenOpts , [{reuseaddr , true },
2926 {nodelay , true },
27+ {reuseaddr , true },
3028 {backlog , 32768 },
3129 {keepalive , true }]),
30+ % % Trapping exit so can close socket in terminate/2
31+ _ = process_flag (trap_exit , true ),
3232 Opts = [{active , false }, {mode , binary }, {packet , raw }, {ip , IPAddress } | SocketOpts ],
3333 case gen_tcp :listen (Port , Opts ) of
3434 {ok , Socket } ->
@@ -38,48 +38,30 @@ init([Pool, ListenOpts, PoolOpts]) ->
3838 {ok , {Socket , MRef }};
3939 {error , eaddrinuse } ->
4040 % % our desired port is already in use
41- % % its likely this grpcbox_socket server is restarting
41+ % % its likely this grpcbox_socket server has been killed ( for reason unknown ) and is restarting
4242 % % previously it would have bound to the port before passing control to our acceptor pool
43+ % % the socket remains open
4344 % % in the restart scenario, the socket process would attempt to bind again
4445 % % to the port and then stop, the sup would keep restarting it
4546 % % and we would end up breaching the restart strategy of the parent sup
4647 % % eventually taking down the entire tree
4748 % % result of which is we have no active listener and grpcbox is effectively down
48- % % so now if we hit eaddrinuse, we check if our acceptor pool is already the
49- % % controlling process, if so we reuse the port from its state and
50- % % allow grpcbox_socket to start cleanly
49+ % % so now if we hit eaddrinuse, we check if our acceptor pool using it
50+ % % if so we close the port here and stop this process
51+ % % NOTE: issuing stop in init wont trigger terminate and so cant rely on
52+ % % the socket being closed there
53+ % % This allows the sup to restart things cleanly
54+ % % We could try to reuse the exising port rather than closing it
55+ % % but side effects were encountered there, so deliberately avoiding
5156
5257 % % NOTE: acceptor_pool has a grace period for connections before it terminates
5358 % % grpcbox_pool sets this to a default of 5 secs
5459 % % this needs considered when deciding on related supervisor restart strategies
5560 % % AND keep in mind the acceptor pool will continue accepting new connections
5661 % % during this grace period
5762
58- % % Other possible fixes here include changing the grpcbox_services_sup from its
59- % % rest_for_one to a one_for_all strategy. This ensures the pool and thus the
60- % % current controlling process of the socket is terminated
61- % % and allows things to restart cleanly if grpcbox_socket dies
62- % % the disadvantage there however is we will drop all existing grpc connections
63-
64- % % Another possible fix is to play with the restart strategy intensity and periods
65- % % and ensure the top level sup doesnt get breached but...
66- % % a requirement will be to ensure the grpcbox_service_sup forces a restart
67- % % of grpcbox_pool and therefore the acceptor_pool process
68- % % as only by doing that will be free up the socket and allow grpcbox_socket to rebind
69- % % thus we end up terminating any existing grpc connections
70-
71- % % Yet another possible fix is to move the cleanup of closing the socket
72- % % out of grpcbox_socket's terminate and into acceptor_pool's terminate
73- % % that however puts two way co-ordination between two distinct libs
74- % % which is far from ideal and in addition will also result in existing grpc connection
75- % % being dropped
76-
77- % % my view is, if at all possible, its better to restart the grpcbox_socket process without
78- % % impacting existing connections, the fix below allows for that, albeit a lil messy
79- % % there is most likely a better solution to all of this, TODO: revisit
80-
8163 % % get the current sockets in use by the acceptor pool
82- % % if one is bound to our target port then reuse
64+ % % if one is bound to our target port then close it
8365 % % need to allow for possibility of multiple services, each with its own socket
8466 % % so we need to identify our interested socket via port number
8567 PoolSockets = grpcbox_pool :pool_sockets (Pool ),
@@ -89,15 +71,15 @@ init([Pool, ListenOpts, PoolOpts]) ->
8971 {ok , Socket };
9072 (_ , Acc ) ->
9173 Acc
92- end , { error , eaddrinuse } , PoolSockets ),
74+ end , socket_not_found , PoolSockets ),
9375 case MaybeHaveExistingSocket of
9476 {ok , Socket } ->
95- MRef = monitor ( port , Socket ),
96- { ok , { Socket , MRef }};
97- { error , Reason } ->
98- { stop , Reason }
99- end ;
100- {error , Reason }->
77+ gen_tcp : close ( Socket );
78+ socket_not_found ->
79+ noop
80+ end ,
81+ { stop , eaddrinuse } ;
82+ {error , Reason } ->
10183 {stop , Reason }
10284 end .
10385
@@ -115,9 +97,10 @@ handle_info(_, State) ->
11597code_change (_ , State , _ ) ->
11698 {ok , State }.
11799
118- terminate (_ , {Socket , MRef }) ->
100+ terminate (_Reason , {Socket , MRef }) ->
119101 % % Socket may already be down but need to ensure it is closed to avoid
120102 % % eaddrinuse error on restart
103+ % % this takes care of that, unless of course this process is killed...
121104 case demonitor (MRef , [flush , info ]) of
122105 true -> gen_tcp :close (Socket );
123106 false -> ok
0 commit comments