error: Some appliance properly components are not functioning.<Timestamp> WARN WrapperStartStopAppMain ChainReplicationProtocol 1207778 fillHole[Token(epoch=79, sequence=3785450026)]: chain head 1/3<Timestamp> WARN netty-1 ClientResponseHandler 1207778 Server threw exception for SERVER_ERROR with request_id: 621<Timestamp> WARN WrapperStartStopAppMain ChainReplicationProtocol 1207778 Recover[Token(epoch=79, sequence=3785450026)]: read chain head 1/3<Timestamp> WARN netty-0 ClientResponseHandler 1207778 Server threw exception for SERVER_ERROR with request_id: 555<Timestamp> WARN WrapperStartStopAppMain AbstractView 1207778 Timeout executing remote call, invalidating view and retrying in PT1Ss<Timestamp> INFO WrapperStartStopAppMain AbstractView 1207778 layoutHelper: Retried 61 times, SystemDownHandlerTriggerLimit = 60<Timestamp> INFO WrapperStartStopAppMain AbstractView 1207778 layoutHelper: Invoking the systemDownHandler.<Timestamp> WARN WrapperStartStopAppMain DataStoreDisconnectHandler 1207778 - [nsx@6876 comp="nsx-manager" level="WARNING" subcomp="manager"] Disconnected from the database, restarting the service/var/log/corfu/corfu.9000.log<Timestamp> | ERROR | worker-1 | o.c.i.NettyServerRouter | Error in handling inbound messageio.netty.util.internal.OutOfDirectMemoryError: failed to allocate NNNNNNN byte(s) of direct memory (used: NNNNNNNNNN, max: 2013265920) at io.netty.util.internal.PlatformDependent.incrementMemoryCounter(PlatformDependent.java:806) at io.netty.util.internal.PlatformDependent.allocateDirectNoCleaner(PlatformDependent.java:735) at io.netty.buffer.PoolArena$DirectArena.allocateDirect(PoolArena.java:649) at io.netty.buffer.PoolArena$DirectArena.newChunk(PoolArena.java:624) at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:203) at io.netty.buffer.PoolArena.tcacheAllocateSmall(PoolArena.java:173) at io.netty.buffer.PoolArena.allocate(PoolArena.java:134) at io.netty.buffer.PoolArena.allocate(PoolArena.java:126) at io.netty.buffer.PooledByteBufAllocator.newDirectBuffer(PooledByteBufAllocator.java:396) at io.netty.buffer.AbstractByteBufAllocator.directBuffer(AbstractByteBufAllocator.java:188) at io.netty.buffer.AbstractByteBufAllocator.directBuffer(AbstractByteBufAllocator.java:179) at io.netty.handler.ssl.SslHandler$SslEngineType$1.allocateWrapBuffer(SslHandler.java:231) at io.netty.handler.ssl.SslHandler.allocateOutNetBuf(SslHandler.java:2257) at io.netty.handler.ssl.SslHandler.wrap(SslHandler.java:824) at io.netty.handler.ssl.SslHandler.wrapAndFlush(SslHandler.java:798) at io.netty.handler.ssl.SslHandler.flush(SslHandler.java:779) at io.netty.channel.AbstractChannelHandlerContext.invokeFlush0(AbstractChannelHandlerContext.java:750) at io.netty.channel.AbstractChannelHandlerContext.invokeFlush(AbstractChannelHandlerContext.java:742) at io.netty.channel.AbstractChannelHandlerContext.flush(AbstractChannelHandlerContext.java:728) at io.netty.channel.ChannelOutboundHandlerAdapter.flush(ChannelOutboundHandlerAdapter.java:125) at io.netty.channel.AbstractChannelHandlerContext.invokeFlush0(AbstractChannelHandlerContext.java:750) at io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:765) at io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071) at io.netty.util.concurrent.AbstractEventExecutor.runTask(AbstractEventExecutor.java:174) at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:167) at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:470) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:503) at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:995) at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) at java.lang.Thread.run(Thread.java:750)/var/log/corfu/tanuki.log shows BatchProcessor thread is stuck."LogUnit-BatchProcessor-0" #31 prio=5 os_prio=0 tid=0x000078bbd40f4800 nid=0x13886 waiting on condition [0x000078bbcf59e000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x000078bc7d93c600> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750)"LogUnit-BatchProcessor-0" #31 prio=5 os_prio=0 tid=0x00006ee838288000 nid=0x138b9 waiting on condition [0x00006ee87c543000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00006ee8ec3bc3a0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) at org.corfudb.infrastructure.BatchProcessor.process(BatchProcessor.java:120) at org.corfudb.infrastructure.BatchProcessor$$Lambda$195/1808390842.run(Unknown Source) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750)VMware NSX
io.netty.util.internal.OutOfDirectMemoryError causes BatchProcessor thread to stuck and it causes the whole corfu cluster to stop working.
Corfu automatically restarts and recovers in NSX 4.2.0 and later.
To recover corfu before NSX 4.2.0, reboot the NSX Manager node where corfu BatchProcessor thread is stuck.