Skip to content

Commit 9f777ad

Browse files
committed
reuse existing listen socket if available during a grpcbox_socket restart
1 parent 033dbfe commit 9f777ad

File tree

1 file changed

+66
-5
lines changed

1 file changed

+66
-5
lines changed

src/grpcbox_socket.erl

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,88 @@
1616
start_link(Pool, ListenOpts, AcceptorOpts) ->
1717
gen_server:start_link(?MODULE, [Pool, ListenOpts, AcceptorOpts], []).
1818

19+
1920
%% gen_server api
2021

2122
init([Pool, ListenOpts, PoolOpts]) ->
23+
%% Trapping exit so can close socket in terminate/2
24+
_ = process_flag(trap_exit, true),
2225
Port = maps:get(port, ListenOpts, 8080),
2326
IPAddress = maps:get(ip, ListenOpts, {0, 0, 0, 0}),
2427
AcceptorPoolSize = maps:get(size, PoolOpts, 10),
2528
SocketOpts = maps:get(socket_options, ListenOpts, [{reuseaddr, true},
2629
{nodelay, true},
27-
{reuseaddr, true},
2830
{backlog, 32768},
2931
{keepalive, true}]),
30-
%% Trapping exit so can close socket in terminate/2
31-
_ = process_flag(trap_exit, true),
3232
Opts = [{active, false}, {mode, binary}, {packet, raw}, {ip, IPAddress} | SocketOpts],
3333
case gen_tcp:listen(Port, Opts) of
3434
{ok, Socket} ->
3535
%% acceptor could close the socket if there is a problem
3636
MRef = monitor(port, Socket),
37-
grpcbox_pool:accept_socket(Pool, Socket, AcceptorPoolSize),
37+
{ok, _} = grpcbox_pool:accept_socket(Pool, Socket, AcceptorPoolSize),
3838
{ok, {Socket, MRef}};
39-
{error, Reason} ->
39+
{error, eaddrinuse} ->
40+
%% our desired port is already in use
41+
%% its likely this grpcbox_socket server is restarting
42+
%% previously it would have bound to the port before passing control to our acceptor pool
43+
%% in the restart scenario, the socket process would attempt to bind again
44+
%% to the port and then stop, the sup would keep restarting it
45+
%% and we would end up breaching the restart strategy of the parent sup
46+
%% eventually taking down the entire tree
47+
%% result of which is we have no active listener and grpcbox is effectively down
48+
%% so now if we hit eaddrinuse, we check if our acceptor pool is already the
49+
%% controlling process, if so we reuse the port from its state and
50+
%% allow grpcbox_socket to start cleanly
51+
52+
%% NOTE: acceptor_pool has a grace period for connections before it terminates
53+
%% grpcbox_pool sets this to a default of 5 secs
54+
%% this needs considered when deciding on related supervisor restart strategies
55+
%% AND keep in mind the acceptor pool will continue accepting new connections
56+
%% during this grace period
57+
58+
%% Other possible fixes here include changing the grpcbox_services_sup from its
59+
%% rest_for_one to a one_for_all strategy. This ensures the pool and thus the
60+
%% current controlling process of the socket is terminated
61+
%% and allows things to restart cleanly if grpcbox_socket dies
62+
%% the disadvantage there however is we will drop all existing grpc connections
63+
64+
%% Another possible fix is to play with the restart strategy intensity and periods
65+
%% and ensure the top level sup doesnt get breached but...
66+
%% a requirement will be to ensure the grpcbox_service_sup forces a restart
67+
%% of grpcbox_pool and therefore the acceptor_pool process
68+
%% as only by doing that will be free up the socket and allow grpcbox_socket to rebind
69+
%% thus we end up terminating any existing grpc connections
70+
71+
%% Yet another possible fix is to move the cleanup of closing the socket
72+
%% out of grpcbox_socket's terminate and into acceptor_pool's terminate
73+
%% that however puts two way co-ordination between two distinct libs
74+
%% which is far from ideal and in addition will also result in existing grpc connection
75+
%% being dropped
76+
77+
%% my view is, if at all possible, its better to restart the grpcbox_socket process without
78+
%% impacting existing connections, the fix below allows for that, albeit a lil messy
79+
%% there is most likely a better solution to all of this, TODO: revisit
80+
81+
%% get the current sockets in use by the acceptor pool
82+
%% if one is bound to our target port then reuse
83+
%% need to allow for possibility of multiple services, each with its own socket
84+
%% so we need to identify our interested socket via port number
85+
PoolSockets = grpcbox_pool:pool_sockets(Pool),
86+
MaybeHaveExistingSocket =
87+
lists:foldl(
88+
fun({inet_tcp, {_IP, BoundPortNumber}, Socket, _SockRef}, _Acc) when BoundPortNumber =:= Port ->
89+
{ok, Socket};
90+
(_, Acc) ->
91+
Acc
92+
end, {error, eaddrinuse}, PoolSockets),
93+
case MaybeHaveExistingSocket of
94+
{ok, Socket} ->
95+
MRef = monitor(port, Socket),
96+
{ok, {Socket, MRef}};
97+
{error, Reason} ->
98+
{stop, Reason}
99+
end;
100+
{error, Reason}->
40101
{stop, Reason}
41102
end.
42103

0 commit comments

Comments
 (0)