From 4f743d361c2e84420d02d9c9d10304b7184b3170 Mon Sep 17 00:00:00 2001 From: Nick Hudson Date: Tue, 23 Jul 2013 12:07:56 +0100 Subject: Re-implement JavaConnection to use Selector When investigating problems on Solaris, attention focused on the JavaConnection class, whose implementation appeared to be non-optimal. The original implementation had a loop which operated on a non-blocking socket, and looked something like this: while (!disconnecting) { while (something to write) { write data to socket; if write failed { sleep(100); // and try again } } try reading data from socket if (any data was read) { process data from socket; } sleep(100); } Because the socket is non-blocking, the reads/writes return straight away. This means that even when no data is being transferred, the loop is executing around ten times a second checking for any data to read/write. In one case (Solaris client talking to Solaris server on the same VM) we were consistently able to get into a state where a write fails to write any data, so that the "something to write" subloop never exits. This in turn means that the "try reading data" section of the main loop is never reached. Investigation failed to uncover why this problem occurs. The underlying socket appears to be returning EAGAIN (equivalent to EWOULDBLOCK), suggesting that the write fails because the client's local buffer is full. This in turn implies that the server isn't reading data quickly enough, leading to the buffers on the client side being full up. But this doesn't explain why, once things have got into this state, they never free up. At any rate, it was felt that the implementation above is not ideal because it is relying on a polling mechanism that is not efficient, rather than being event driven. So this change re-implements JavaConnection to use a Selector, which means that the main loop is event-driven. The new implementation looks like this while (!disconnected) { wait for selector if (disconnected) { break; } if something to write { try to write data; } if something to read { try to read data; } if still something to write { sleep(100); post wake event; // so that next wait completes straight away } } Test-information: Testing appears to show that the problems we saw on Solaris are no longer seen with this patch (Solaris tests still fail, but later on, which appears to be due to a separate problem). Testing shows that this leads to the thread spending much more time idle, and only being active when data is being read/written (unlike the original implementation which was looping ten times a second regardless of whether any data was being read/written). Testing using MLC seems to show the new implementation works OK. I was unable to provoke the "write buffer not completely written" case, so faked it by making the doWrite() method constrain its maximum write size to 200 bytes. By doing this I verified that the "leftOver" section of code was working properly (and incidentally fixed a problem with the the initial implementation of the patch that had been passing the wrong parameter to System.arrayCopy). Change-Id: I5a6191567ba7e9afdb9a26febf00eae72b00f6eb Signed-off-by: Nick Hudson diff --git a/src/com/isode/stroke/network/JavaConnection.java b/src/com/isode/stroke/network/JavaConnection.java index 7814f71..efbf54f 100644 --- a/src/com/isode/stroke/network/JavaConnection.java +++ b/src/com/isode/stroke/network/JavaConnection.java @@ -3,7 +3,7 @@ * All rights reserved. */ /* - * Copyright (c) 2010-2012, Isode Limited, London, England. + * Copyright (c) 2010-2013, Isode Limited, London, England. * All rights reserved. */ package com.isode.stroke.network; @@ -12,6 +12,8 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.net.Socket; import java.nio.ByteBuffer; +import java.nio.channels.SelectionKey; +import java.nio.channels.Selector; import java.nio.channels.SocketChannel; import java.util.ArrayList; import java.util.Collections; @@ -32,6 +34,10 @@ public class JavaConnection extends Connection implements EventOwner { public Worker(HostAddressPort address) { address_ = address; } + + private boolean isWriteNeeded() { + return (!writeBuffer_.isEmpty()); + } public void run() { try { @@ -42,79 +48,87 @@ public class JavaConnection extends Connection implements EventOwner { * isn't what we want */ socketChannel_.configureBlocking(false); + selector_ = Selector.open(); + selectionKey_ = socketChannel_.register(selector_, SelectionKey.OP_READ); } catch (IOException ex) { handleConnected(true); return; } handleConnected(false); while (!disconnecting_) { - while (!writeBuffer_.isEmpty()) { - ByteArray data = writeBuffer_.get(0); - ByteBuffer byteBuffer = ByteBuffer.wrap(data.getData()); - try { - /* Because the SocketChannel is non-blocking, we have to - * be prepared to cope with the write operation not - * consuming all of the data - */ - boolean finishedWriting = false; - while (!finishedWriting && !disconnecting_) { - socketChannel_.write(byteBuffer); - finishedWriting = (byteBuffer.remaining() == 0); - if (!finishedWriting) { - try { - /* Give the output buffer a chance to empty */ - Thread.sleep(100); - } - catch (InterruptedException e) { - /* Perhaps someone has set disconnecting_ */ - } - } - } - } catch (IOException ex) { - disconnecting_ = true; - handleDisconnected(Error.WriteError); - } - writeBuffer_.remove(0); - } - ByteArray data = new ByteArray(); + /* This will block until something is ready on the selector, + * including someone calling selector.wakeup(), or until the + * thread is interrupted + */ try { - ByteBuffer byteBuffer = ByteBuffer.allocate(1024); - - int count = socketChannel_.read(byteBuffer); - while (count > 0) { - byteBuffer.flip(); - byte[] result = new byte[byteBuffer.remaining()]; - byteBuffer.get(result); - byteBuffer.compact(); - for (int i=0; i 0) { + byteBuffer.flip(); + byte[] result = new byte[byteBuffer.remaining()]; + byteBuffer.get(result); + byteBuffer.compact(); + for (int i=0; i