|
16 | 16 | start_link(Pool, ListenOpts, AcceptorOpts) -> |
17 | 17 | gen_server:start_link(?MODULE, [Pool, ListenOpts, AcceptorOpts], []). |
18 | 18 |
|
| 19 | + |
19 | 20 | %% gen_server api |
20 | 21 |
|
21 | 22 | init([Pool, ListenOpts, PoolOpts]) -> |
| 23 | + %% Trapping exit so can close socket in terminate/2 |
| 24 | + _ = process_flag(trap_exit, true), |
22 | 25 | Port = maps:get(port, ListenOpts, 8080), |
23 | 26 | IPAddress = maps:get(ip, ListenOpts, {0, 0, 0, 0}), |
24 | 27 | AcceptorPoolSize = maps:get(size, PoolOpts, 10), |
25 | 28 | SocketOpts = maps:get(socket_options, ListenOpts, [{reuseaddr, true}, |
26 | 29 | {nodelay, true}, |
27 | | - {reuseaddr, true}, |
28 | 30 | {backlog, 32768}, |
29 | 31 | {keepalive, true}]), |
30 | | - %% Trapping exit so can close socket in terminate/2 |
31 | | - _ = process_flag(trap_exit, true), |
32 | 32 | Opts = [{active, false}, {mode, binary}, {packet, raw}, {ip, IPAddress} | SocketOpts], |
33 | 33 | case gen_tcp:listen(Port, Opts) of |
34 | 34 | {ok, Socket} -> |
35 | 35 | %% acceptor could close the socket if there is a problem |
36 | 36 | MRef = monitor(port, Socket), |
37 | | - grpcbox_pool:accept_socket(Pool, Socket, AcceptorPoolSize), |
| 37 | + {ok, _} = grpcbox_pool:accept_socket(Pool, Socket, AcceptorPoolSize), |
38 | 38 | {ok, {Socket, MRef}}; |
39 | | - {error, Reason} -> |
| 39 | + {error, eaddrinuse} -> |
| 40 | + %% our desired port is already in use |
| 41 | + %% its likely this grpcbox_socket server is restarting |
| 42 | + %% previously it would have bound to the port before passing control to our acceptor pool |
| 43 | + %% in the restart scenario, the socket process would attempt to bind again |
| 44 | + %% to the port and then stop, the sup would keep restarting it |
| 45 | + %% and we would end up breaching the restart strategy of the parent sup |
| 46 | + %% eventually taking down the entire tree |
| 47 | + %% result of which is we have no active listener and grpcbox is effectively down |
| 48 | + %% so now if we hit eaddrinuse, we check if our acceptor pool is already the |
| 49 | + %% controlling process, if so we reuse the port from its state and |
| 50 | + %% allow grpcbox_socket to start cleanly |
| 51 | + |
| 52 | + %% NOTE: acceptor_pool has a grace period for connections before it terminates |
| 53 | + %% grpcbox_pool sets this to a default of 5 secs |
| 54 | + %% this needs considered when deciding on related supervisor restart strategies |
| 55 | + %% AND keep in mind the acceptor pool will continue accepting new connections |
| 56 | + %% during this grace period |
| 57 | + |
| 58 | + %% Other possible fixes here include changing the grpcbox_services_sup from its |
| 59 | + %% rest_for_one to a one_for_all strategy. This ensures the pool and thus the |
| 60 | + %% current controlling process of the socket is terminated |
| 61 | + %% and allows things to restart cleanly if grpcbox_socket dies |
| 62 | + %% the disadvantage there however is we will drop all existing grpc connections |
| 63 | + |
| 64 | + %% Another possible fix is to play with the restart strategy intensity and periods |
| 65 | + %% and ensure the top level sup doesnt get breached but... |
| 66 | + %% a requirement will be to ensure the grpcbox_service_sup forces a restart |
| 67 | + %% of grpcbox_pool and therefore the acceptor_pool process |
| 68 | + %% as only by doing that will be free up the socket and allow grpcbox_socket to rebind |
| 69 | + %% thus we end up terminating any existing grpc connections |
| 70 | + |
| 71 | + %% Yet another possible fix is to move the cleanup of closing the socket |
| 72 | + %% out of grpcbox_socket's terminate and into acceptor_pool's terminate |
| 73 | + %% that however puts two way co-ordination between two distinct libs |
| 74 | + %% which is far from ideal and in addition will also result in existing grpc connection |
| 75 | + %% being dropped |
| 76 | + |
| 77 | + %% my view is, if at all possible, its better to restart the grpcbox_socket process without |
| 78 | + %% impacting existing connections, the fix below allows for that, albeit a lil messy |
| 79 | + %% there is most likely a better solution to all of this, TODO: revisit |
| 80 | + |
| 81 | + %% get the current sockets in use by the acceptor pool |
| 82 | + %% if one is bound to our target port then reuse |
| 83 | + %% need to allow for possibility of multiple services, each with its own socket |
| 84 | + %% so we need to identify our interested socket via port number |
| 85 | + PoolSockets = grpcbox_pool:pool_sockets(Pool), |
| 86 | + MaybeHaveExistingSocket = |
| 87 | + lists:foldl( |
| 88 | + fun({inet_tcp, {_IP, BoundPortNumber}, Socket, _SockRef}, _Acc) when BoundPortNumber =:= Port -> |
| 89 | + {ok, Socket}; |
| 90 | + (_, Acc) -> |
| 91 | + Acc |
| 92 | + end, {error, eaddrinuse}, PoolSockets), |
| 93 | + case MaybeHaveExistingSocket of |
| 94 | + {ok, Socket} -> |
| 95 | + MRef = monitor(port, Socket), |
| 96 | + {ok, {Socket, MRef}}; |
| 97 | + {error, Reason} -> |
| 98 | + {stop, Reason} |
| 99 | + end; |
| 100 | + {error, Reason}-> |
40 | 101 | {stop, Reason} |
41 | 102 | end. |
42 | 103 |
|
|
0 commit comments