From 52284f61ab333ec199774b38340a35eeef5fa94c Mon Sep 17 00:00:00 2001 From: Dmitry Zuikov Date: Thu, 25 May 2023 13:01:58 +0300 Subject: [PATCH] respawn process on error --- hbs2-peer/app/PeerMain.hs | 53 +++++++++++++++++++++++++++++++-------- hbs2-peer/app/RPC.hs | 5 +++- hbs2-peer/hbs2-peer.cabal | 1 + 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/hbs2-peer/app/PeerMain.hs b/hbs2-peer/app/PeerMain.hs index 46860edf..9218ff7b 100644 --- a/hbs2-peer/app/PeerMain.hs +++ b/hbs2-peer/app/PeerMain.hs @@ -60,15 +60,16 @@ import Crypto.Saltine (sodiumInit) import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy qualified as LBS import Data.ByteString qualified as BS +import Data.Cache qualified as Cache import Data.Function import Data.List qualified as L import Data.Map qualified as Map import Data.Maybe import Data.Set qualified as Set import Data.Set (Set) +import Data.Text.Encoding qualified as TE import Data.Text qualified as Text import Data.Text (Text) -import Data.Text.Encoding qualified as TE import GHC.Stats import GHC.TypeLits import Lens.Micro.Platform @@ -79,7 +80,8 @@ import System.Exit import System.IO import System.Mem import System.Metrics -import Data.Cache qualified as Cache +import System.Posix.Process +import System.Environment import UnliftIO.Exception qualified as U -- import UnliftIO.STM @@ -167,7 +169,8 @@ makeLenses 'RPCOpt data RPCCommand = - POKE + DIE + | POKE | ANNOUNCE (Hash HbSync) | PING (PeerAddr L4Proto) (Maybe (Peer L4Proto)) | CHECK PeerNonce (PeerAddr L4Proto) (Hash HbSync) @@ -185,6 +188,7 @@ data PeerOpts = , _listenRpc :: Maybe String , _peerCredFile :: Maybe FilePath , _peerConfig :: Maybe FilePath + , _peerRespawn :: Maybe Bool } deriving stock (Data) @@ -234,6 +238,7 @@ runCLI = join . customExecParser (prefs showHelpOnError) $ parser = hsubparser ( command "init" (info pInit (progDesc "creates default config")) <> command "run" (info pRun (progDesc "run peer")) <> command "poke" (info pPoke (progDesc "poke peer by rpc")) + <> command "die" (info pDie (progDesc "die cmd")) <> command "announce" (info pAnnounce (progDesc "announce block")) <> command "ping" (info pPing (progDesc "ping another peer")) <> command "fetch" (info pFetch (progDesc "fetch block")) @@ -262,7 +267,9 @@ runCLI = join . customExecParser (prefs showHelpOnError) $ c <- optional confOpt - pure $ PeerOpts pref l r k c + resp <- optional $ flag' True ( long "respawn" <> short 'r' <> help "respawn process") + + pure $ PeerOpts pref l r k c resp pRun = do runPeer <$> common @@ -271,6 +278,10 @@ runCLI = join . customExecParser (prefs showHelpOnError) $ RPCOpt <$> optional confOpt <*> optional rpcOpt + pDie = do + rpc <- pRpcCommon + pure $ runRpcCommand rpc DIE + pPoke = do rpc <- pRpcCommon pure $ runRpcCommand rpc POKE @@ -358,7 +369,7 @@ runCLI = join . customExecParser (prefs showHelpOnError) $ myException :: SomeException -> IO () -myException e = err ( show e ) >> notice "RESTARTING..." +myException e = err ( show e ) newtype CredentialsM e s m a = @@ -417,17 +428,28 @@ instance ( Monad m response = lift . response --- runPeer :: forall e . (e ~ UDP, Nonce (RefLogUpdate e) ~ BS.ByteString) => PeerOpts -> IO () +respawn :: PeerOpts -> IO () +respawn opts = case view peerRespawn opts of + Just True -> do + let secs = 5 + notice $ "RESPAWNING in" <+> viaShow secs <> "s" + pause @'Seconds secs + self <- getExecutablePath + args <- getArgs + print (self, args) + executeFile self False args Nothing + + _ -> exitFailure + runPeer :: forall e s . ( e ~ L4Proto , FromStringMaybe (PeerAddr e) , s ~ Encryption e ) => PeerOpts -> IO () -runPeer opts = fix \me -> U.handle (\e -> myException e - >> performGC - >> pause @'Seconds 10 - >> me - ) $ runResourceT do +runPeer opts = U.handle (\e -> myException e + >> performGC + >> respawn opts + ) $ runResourceT do metrics <- liftIO newStore @@ -867,6 +889,8 @@ runPeer opts = fix \me -> U.handle (\e -> myException e void $ liftIO $ waitAnyCancel workers + let dieAction _ = do + liftIO $ die "received die command" let pokeAction _ = do who <- thatPeer (Proxy @(RPC e)) @@ -945,6 +969,7 @@ runPeer opts = fix \me -> U.handle (\e -> myException e request who (RPCRefLogGetAnswer @e h) let arpc = RpcAdapter pokeAction + dieAction dontHandle dontHandle annAction @@ -1035,6 +1060,7 @@ withRPC o cmd = rpcClientMain o $ runResourceT do let adapter = RpcAdapter dontHandle + dontHandle (liftIO . atomically . writeTQueue pokeQ) (liftIO . atomically . writeTQueue pokeFQ) (const $ liftIO exitSuccess) @@ -1074,6 +1100,10 @@ withRPC o cmd = rpcClientMain o $ runResourceT do exitSuccess + RPCDie{} -> do + pause @'Seconds 0.25 + liftIO exitSuccess + RPCPoke{} -> do let onTimeout = do pause @'Seconds 1.5 Log.info "no-one-is-here" @@ -1117,6 +1147,7 @@ withRPC o cmd = rpcClientMain o $ runResourceT do runRpcCommand :: FromStringMaybe (IPAddrPort L4Proto) => RPCOpt -> RPCCommand -> IO () runRpcCommand opt = \case + DIE -> withRPC opt RPCDie POKE -> withRPC opt RPCPoke PING s _ -> withRPC opt (RPCPing s) ANNOUNCE h -> withRPC opt (RPCAnnounce h) diff --git a/hbs2-peer/app/RPC.hs b/hbs2-peer/app/RPC.hs index 9393ff25..2cbbf0ad 100644 --- a/hbs2-peer/app/RPC.hs +++ b/hbs2-peer/app/RPC.hs @@ -23,7 +23,8 @@ data SetLogging = instance Serialise SetLogging data RPC e = - RPCPoke + RPCDie + | RPCPoke | RPCPing (PeerAddr e) | RPCPong (PeerAddr e) | RPCPokeAnswer (PubKey 'Sign (Encryption e)) @@ -59,6 +60,7 @@ makeLenses 'RPCEnv data RpcAdapter e m = RpcAdapter { rpcOnPoke :: RPC e -> m () + , rpcOnDie :: RPC e -> m () , rpcOnPokeAnswer :: PubKey 'Sign (Encryption e) -> m () , rpcOnPokeAnswerFull :: Text -> m () , rpcOnAnnounce :: Hash HbSync -> m () @@ -112,6 +114,7 @@ rpcHandler :: forall e m . ( MonadIO m => RpcAdapter e m -> RPC e -> m () rpcHandler adapter = \case + p@RPCDie{} -> rpcOnDie adapter p p@RPCPoke{} -> rpcOnPoke adapter p (RPCPokeAnswer k) -> rpcOnPokeAnswer adapter k (RPCPokeAnswerFull k) -> rpcOnPokeAnswerFull adapter k diff --git a/hbs2-peer/hbs2-peer.cabal b/hbs2-peer/hbs2-peer.cabal index 89a17d2c..0e2aceb8 100644 --- a/hbs2-peer/hbs2-peer.cabal +++ b/hbs2-peer/hbs2-peer.cabal @@ -64,6 +64,7 @@ common common-deps , http-types , wai-extra , unliftio + , unix common shared-properties ghc-options: