From f38654a113fb5ee551384f69a50cf7ac157731cd Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Tue, 20 Nov 2018 12:41:50 +0100 Subject: [PATCH 01/93] microLB: Add extra asserts in waiting list --- lib/microLB/micro_lb/balancer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 69cdb7e0ca..a6e512385a 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -59,6 +59,7 @@ namespace microLB } void Balancer::incoming(net::Stream_ptr conn) { + assert(conn != nullptr); queue.emplace_back(std::move(conn)); LBOUT("Queueing connection (q=%lu)\n", queue.size()); // IMPORTANT: try to handle queue, in case its ready @@ -71,6 +72,7 @@ namespace microLB while (nodes.pool_size() > 0 && queue.empty() == false) { auto& client = queue.front(); + assert(client.conn != nullptr); if (client.conn->is_connected()) { // NOTE: explicitly want to copy buffers net::Stream_ptr rval = @@ -125,14 +127,15 @@ namespace microLB Waiting::Waiting(net::Stream_ptr incoming) : conn(std::move(incoming)), total(0) { + assert(this->conn != nullptr); // queue incoming data from clients not yet // assigned to a node - conn->on_read(READQ_PER_CLIENT, + this->conn->on_read(READQ_PER_CLIENT, [this] (auto buf) { // prevent buffer bloat attack this->total += buf->size(); if (this->total > MAX_READQ_PER_NODE) { - conn->close(); + this->conn->close(); } else { LBOUT("*** Queued %lu bytes\n", buf->size()); From f426b6c69d7463c7b75f57b4fbdfdcb9c7353190 Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Sun, 25 Nov 2018 20:59:47 +0100 Subject: [PATCH 02/93] test: Make microLB test send 50Mb strings --- test/net/integration/microLB/server.js | 8 ++++++-- test/net/integration/microLB/test.py | 27 +++++++++++++------------- test/net/integration/microLB/vm.json | 2 +- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/test/net/integration/microLB/server.js b/test/net/integration/microLB/server.js index 530854bf1f..b6ea1bd9fb 100644 --- a/test/net/integration/microLB/server.js +++ b/test/net/integration/microLB/server.js @@ -1,5 +1,10 @@ var http = require('http'); +var dataString = function() { + var len = 1024*1024 * 50; + return '#'.repeat(len); +} + var stringToColour = function(str) { var hash = 0; for (var i = 0; i < str.length; i++) { @@ -17,8 +22,7 @@ var stringToColour = function(str) { function handleRequest(request, response){ response.setTimeout(500); var addr = request.connection.localPort; - var page = "" + addr; - response.end(page); + response.end(addr.toString() + dataString()); } http.createServer(handleRequest).listen(6001, '10.0.0.1'); diff --git a/test/net/integration/microLB/test.py b/test/net/integration/microLB/test.py index 9089f98bc2..24777b3f30 100755 --- a/test/net/integration/microLB/test.py +++ b/test/net/integration/microLB/test.py @@ -13,11 +13,12 @@ from vmrunner import vmrunner import requests +expected_string = "#" * 1024 * 1024 * 50 -def validateRequest(expected = ""): +def validateRequest(addr): response = requests.get('https://10.0.0.68:443', verify=False) - print (response.content) - return (response.content) == expected + #print (response.content) + return (response.content) == str(addr) + expected_string # start nodeJS pro = subprocess.Popen(["nodejs", "server.js"], stdout=subprocess.PIPE) @@ -25,15 +26,15 @@ def validateRequest(expected = ""): requests_completed = False def startBenchmark(line): print " starting test " - assert validateRequest("6001") - assert validateRequest("6002") - assert validateRequest("6003") - assert validateRequest("6004") - - assert validateRequest("6001") - assert validateRequest("6002") - assert validateRequest("6003") - assert validateRequest("6004") + assert validateRequest(6001) + assert validateRequest(6002) + assert validateRequest(6003) + assert validateRequest(6004) + + assert validateRequest(6001) + assert validateRequest(6002) + assert validateRequest(6003) + assert validateRequest(6004) print "Waiting for TCP MSL end..." global requests_completed requests_completed = True @@ -58,4 +59,4 @@ def cleanup(): vm.on_output("TCP MSL ended", mslEnded) # Boot the VM, taking a timeout as parameter -vm.cmake().boot(20).clean() +vm.cmake().boot(60).clean() diff --git a/test/net/integration/microLB/vm.json b/test/net/integration/microLB/vm.json index b8fe2d0910..85f68a14cf 100644 --- a/test/net/integration/microLB/vm.json +++ b/test/net/integration/microLB/vm.json @@ -6,5 +6,5 @@ {"device" : "virtio"}, {"device" : "virtio"} ], - "mem" : 512 + "mem" : 64 } From 565afc7386d891b21649f022e88393e852a6b797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Tue, 27 Nov 2018 10:58:04 +0100 Subject: [PATCH 03/93] tcp: Change RCV WND value from a static value to a dynamic value based on global mem avail --- api/net/tcp/connection.hpp | 12 +++++++++++- api/net/tcp/tcp.hpp | 9 +++++++++ src/net/tcp/connection.cpp | 16 +++++++++++++++- src/net/tcp/tcp.cpp | 12 ++++++++++++ 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index d4b6038b8e..0e8e284273 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -231,7 +231,8 @@ class Connection { SEQ_OUT_OF_ORDER, ACK_NOT_SET, ACK_OUT_OF_ORDER, - RST + RST, + RCV_WND_ZERO }; // < Drop_reason /** @@ -599,6 +600,11 @@ class Connection { */ void reset_callbacks(); + + using Recv_window_getter = delegate; + void set_recv_wnd_getter(Recv_window_getter func) + { recv_wnd_getter = func; } + private: /** "Parent" for Connection. */ TCP& host_; @@ -626,6 +632,10 @@ class Connection { /** Round Trip Time Measurer */ RTTM rttm; + /** Function from where to retrieve + * the current recv window size for this connection */ + Recv_window_getter recv_wnd_getter; + /** Callbacks */ ConnectCallback on_connect_; DisconnectCallback on_disconnect_; diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 826753b85f..08f8063542 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -518,6 +518,15 @@ namespace net { return this->cpu_id; } + /** + * @brief Return a value that's supposed to describe how much + * a connection should announce as it's RCV WND, + * with regards to the whole system. + * + * @return A RCV WND value, maximum 1GB + */ + static uint32_t global_recv_wnd(); + private: IPStack& inet_; Listeners listeners_; diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index d2abdaa3a4..aa56bd988d 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -36,6 +36,7 @@ Connection::Connection(TCP& host, Socket local, Socket remote, ConnectCallback c cb{host_.window_size()}, read_request(nullptr), writeq(), + recv_wnd_getter{TCP::global_recv_wnd}, on_connect_{std::move(callback)}, on_disconnect_({this, &Connection::default_on_disconnect}), rtx_timer({this, &Connection::rtx_timeout}), @@ -323,7 +324,9 @@ Packet_view_ptr Connection::create_outgoing_packet() // Set Destination (remote) packet->set_destination(remote_); - packet->set_win(std::min((cb.RCV.WND >> cb.RCV.wind_shift), (uint32_t)default_window_size)); + const auto recv_wnd = recv_wnd_getter(); + //printf("recv_wnd %u\n", recv_wnd); + packet->set_win(cb.RCV.WND >> cb.RCV.wind_shift); if(cb.SND.TS_OK) packet->add_tcp_option_aligned(host_.get_ts_value(), cb.get_ts_recent()); @@ -683,6 +686,15 @@ void Connection::recv_data(const Packet_view& in) { Expects(in.has_tcp_data()); + // just drop the packet if we don't have a recv wnd. + // this is really awful and probably unnecesseary, + // since it could be that we already preallocated that memory in our vector. + // i also think we shouldn't reach this point due to State::check_seq checking + // if we're inside the window. if packet is out of order tho we can change the RCV wnd (i think). + if(recv_wnd_getter() == 0) + drop(in, Drop_reason::RCV_WND_ZERO); + + // Keep track if a packet is being sent during the async read callback const auto snd_nxt = cb.SND.NXT; @@ -719,6 +731,8 @@ void Connection::recv_data(const Packet_view& in) const auto recv = read_request->insert(in.seq(), in.tcp_data(), length, in.isset(PSH)); // this ensures that the data we ACK is actually put in our buffer. Ensures(recv == length); + // adjust the rcv wnd to (maybe) new value + cb.RCV.WND = recv_wnd_getter(); } } // Packet out of order diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index fb1ed06be5..2ccf8055f0 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -29,6 +29,7 @@ #include // nanos_now (get_ts_value) #include #include +#include using namespace std; using namespace net; @@ -254,6 +255,8 @@ void TCP::receive(Packet_view& packet) // No open connection found, find listener for destination debug(" No connection found - looking for listener..\n"); + // TODO: Avoid creating a new connection if we're running out of memory. + // something like "mem avail" > (max_buffer_limit * 3) maybe auto listener_it = find_listener(dest); // Listener found => Create Listener @@ -370,6 +373,15 @@ void TCP::reset_pmtu(Socket dest, IP4::PMTU pmtu) { } } +uint32_t TCP::global_recv_wnd() +{ + // 80% of free mem + // normalize to 0 to avoid negative value (???) + ssize_t avail = std::max((static_cast(OS::heap_avail()) * 80 / 100) / 2, 0); + //printf("heap: %zi avail: %zu\n", (ssize_t)OS::heap_avail(), avail); + return std::min(avail, (1 << 30)); // max can only be 1GB +} + void TCP::transmit(tcp::Packet_view_ptr packet) { // Generate checksum. From 86e30fea52fdf5ac9194e6a4ed82bb1d64ee309f Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Tue, 27 Nov 2018 11:55:23 +0100 Subject: [PATCH 04/93] microLB: Remove session timeouts, close_session properly --- lib/microLB/micro_lb/balancer.cpp | 43 +++--------------------------- lib/microLB/micro_lb/balancer.hpp | 6 +---- lib/microLB/micro_lb/serialize.cpp | 4 +-- 3 files changed, 7 insertions(+), 46 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index a6e512385a..3ecae04848 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -11,8 +11,6 @@ // connection attempt timeouts #define CONNECT_TIMEOUT 10s #define CONNECT_THROW_PERIOD 20s -#define INITIAL_SESSION_TIMEOUT 5s -#define ROLLING_SESSION_TIMEOUT 60s #define LB_VERBOSE 0 #if LB_VERBOSE @@ -214,7 +212,7 @@ namespace microLB return session_total; } int32_t Nodes::timed_out_sessions() const { - return session_timeouts; + return 0; } Session& Nodes::create_session(bool talk, net::Stream_ptr client, net::Stream_ptr outgoing) { @@ -239,21 +237,15 @@ namespace microLB assert(session.is_alive()); return session; } - void Nodes::close_session(int idx, bool timeout) + void Nodes::close_session(int idx) { auto& session = get_session(idx); - // disable timeout timer - if (session.timeout_timer != Timers::UNUSED_ID) { - Timers::stop(session.timeout_timer); - session.timeout_timer = Timers::UNUSED_ID; - } // remove connections session.incoming->reset_callbacks(); session.incoming = nullptr; session.outgoing->reset_callbacks(); session.outgoing = nullptr; // free session - if (timeout) this->session_timeouts++; free_sessions.push_back(session.self); session_cnt--; LBOUT("Session %d closed (total = %d)\n", session.self, session_cnt); @@ -381,53 +373,26 @@ namespace microLB : parent(n), self(idx), incoming(std::move(inc)), outgoing(std::move(out)) { - // if the client talked before it was assigned a session, use bigger timeout - auto timeout = (talk) ? ROLLING_SESSION_TIMEOUT : INITIAL_SESSION_TIMEOUT; - // session timeout timer - this->timeout_timer = Timers::oneshot(timeout, - [&nodes = n, this] (int) { - this->timeout(nodes); - }); incoming->on_read(READQ_PER_CLIENT, [this] (auto buf) { assert(this->is_alive()); - this->handle_timeout(); this->outgoing->write(buf); }); incoming->on_close( [&nodes = n, idx] () { - nodes.get_session(idx).outgoing->close(); - //nodes.get_session(idx).incoming->close(); + nodes.close_session(idx); }); outgoing->on_read(READQ_FOR_NODES, [this] (auto buf) { assert(this->is_alive()); - this->handle_timeout(); this->incoming->write(buf); }); outgoing->on_close( [&nodes = n, idx] () { - //nodes.get_session(idx).outgoing->close(); - nodes.get_session(idx).incoming->close(); + nodes.close_session(idx); }); } bool Session::is_alive() const { return incoming != nullptr; } - void Session::handle_timeout() - { - // stop old timer - Timers::stop(this->timeout_timer); - // create new timeout - this->timeout_timer = Timers::oneshot(ROLLING_SESSION_TIMEOUT, - [&nodes = parent, this] (int) { - this->timeout(nodes); - }); - } - void Session::timeout(Nodes& nodes) - { - assert(this->is_alive()); - this->timeout_timer = Timers::UNUSED_ID; - nodes.close_session(this->self, true); - } } diff --git a/lib/microLB/micro_lb/balancer.hpp b/lib/microLB/micro_lb/balancer.hpp index 0a83abc9cb..f1fd74fce1 100644 --- a/lib/microLB/micro_lb/balancer.hpp +++ b/lib/microLB/micro_lb/balancer.hpp @@ -23,13 +23,10 @@ namespace microLB struct Session { Session(Nodes&, int idx, bool talk, net::Stream_ptr in, net::Stream_ptr out); bool is_alive() const; - void handle_timeout(); - void timeout(Nodes&); void serialize(liu::Storage&); Nodes& parent; const int self; - int timeout_timer; net::Stream_ptr incoming; net::Stream_ptr outgoing; }; @@ -80,7 +77,7 @@ namespace microLB // returns the connection back if the operation fails net::Stream_ptr assign(net::Stream_ptr, queue_vector_t&); Session& create_session(bool talk, net::Stream_ptr inc, net::Stream_ptr out); - void close_session(int, bool timeout = false); + void close_session(int); Session& get_session(int); void serialize(liu::Storage&); @@ -90,7 +87,6 @@ namespace microLB nodevec_t nodes; int64_t session_total = 0; int session_cnt = 0; - int session_timeouts = 0; int conn_iterator = 0; int algo_iterator = 0; std::deque sessions; diff --git a/lib/microLB/micro_lb/serialize.cpp b/lib/microLB/micro_lb/serialize.cpp index 674b7d496e..f6b9619e03 100644 --- a/lib/microLB/micro_lb/serialize.cpp +++ b/lib/microLB/micro_lb/serialize.cpp @@ -15,7 +15,7 @@ namespace microLB void Nodes::serialize(Storage& store) { store.add(100, this->session_total); - store.add_int(100, this->session_timeouts); + //store.add_int(100, this->session_timeouts); store.put_marker(100); const int tot_sessions = sessions.size() - free_sessions.size(); @@ -48,7 +48,7 @@ namespace microLB { /// nodes member fields /// this->session_total = store.as_type(); store.go_next(); - this->session_timeouts = store.as_int(); store.go_next(); + //this->session_timeouts = store.as_int(); store.go_next(); store.pop_marker(100); /// sessions /// From 974847c37178551ef63f6a46ac6508e90b3730fa Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Tue, 27 Nov 2018 14:35:58 +0100 Subject: [PATCH 05/93] microLB: Option to disable active checks --- lib/microLB/micro_lb/autoconf.cpp | 11 +++- lib/microLB/micro_lb/balancer.cpp | 82 +++++++++++++++--------- lib/microLB/micro_lb/balancer.hpp | 23 ++++--- lib/microLB/micro_lb/openssl.cpp | 3 +- test/net/integration/microLB/config.json | 1 + 5 files changed, 75 insertions(+), 45 deletions(-) diff --git a/lib/microLB/micro_lb/autoconf.cpp b/lib/microLB/micro_lb/autoconf.cpp index 3c9c498b00..221b108791 100644 --- a/lib/microLB/micro_lb/autoconf.cpp +++ b/lib/microLB/micro_lb/autoconf.cpp @@ -29,9 +29,14 @@ namespace microLB (void) CLIENT_SLIMIT; auto& nodes = obj["nodes"]; + // node interface const int NODE_NET = nodes["iface"].GetInt(); auto& netout = net::Super_stack::get(NODE_NET); - netout.tcp().set_MSL(15s); + // node active-checks + bool use_active_check = true; + if (nodes.HasMember("active_check")) { + use_active_check = nodes["active_check"].GetBool(); + } Balancer* balancer = nullptr; @@ -39,13 +44,13 @@ namespace microLB { assert(clients.HasMember("key") && "TLS-enabled microLB must also have key"); // create TLS over TCP load balancer - balancer = new Balancer(netinc, CLIENT_PORT, netout, + balancer = new Balancer(netinc, CLIENT_PORT, netout, use_active_check, clients["certificate"].GetString(), clients["key"].GetString()); } else { // create TCP load balancer - balancer = new Balancer(netinc, CLIENT_PORT, netout); + balancer = new Balancer(netinc, CLIENT_PORT, netout, use_active_check); } auto& nodelist = nodes["list"]; diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 3ecae04848..1d42fc3f3f 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -25,8 +25,10 @@ namespace microLB { Balancer::Balancer( netstack_t& incoming, uint16_t in_port, - netstack_t& outgoing) - : nodes(), netin(incoming), netout(outgoing), signal({this, &Balancer::handle_queue}) + netstack_t& outgoing, bool active_check) + : nodes(active_check), + netin(incoming), netout(outgoing), + signal({this, &Balancer::handle_queue}) { netin.tcp().listen(in_port, [this] (auto conn) { @@ -104,6 +106,7 @@ namespace microLB estimate = std::min(estimate, MAX_OUTGOING_ATTEMPTS); estimate = std::max(0, estimate - np_connecting); // create more outgoing connections + LBOUT("Estimated connections needed: %d\n", estimate); if (estimate > 0) { try { @@ -147,18 +150,29 @@ namespace microLB // temporary iterator for (int i = 0; i < total; i++) { + bool dest_found = false; // look for next active node up to *size* times for (size_t i = 0; i < nodes.size(); i++) { - int iter = conn_iterator; + const int iter = conn_iterator; conn_iterator = (conn_iterator + 1) % nodes.size(); // if the node is active, connect immediately - bool is_active = nodes[iter].is_active(); - if (is_active) { - nodes[iter].connect(); + auto& dest_node = nodes[iter]; + if (dest_node.is_active()) { + dest_node.connect(); + dest_found = true; break; } } + // if no active node found, simply delegate to the next node + if (dest_found == false) + { + // with active-checks we can return here later when we get a connection + if (this->do_active_check) return; + const int iter = conn_iterator; + conn_iterator = (conn_iterator + 1) % nodes.size(); + nodes[iter].connect(); + } } } net::Stream_ptr Nodes::assign(net::Stream_ptr conn, queue_vector_t& readq) @@ -175,7 +189,7 @@ namespace microLB LBOUT("Assigning client to node %d (%s)\n", algo_iterator, outgoing->to_string().c_str()); auto& session = this->create_session( - not readq.empty(), std::move(conn), std::move(outgoing)); + std::move(conn), std::move(outgoing)); // flush readq to session.outgoing for (auto buffer : readq) { LBOUT("*** Flushing %lu bytes\n", buffer->size()); @@ -214,15 +228,15 @@ namespace microLB int32_t Nodes::timed_out_sessions() const { return 0; } - Session& Nodes::create_session(bool talk, net::Stream_ptr client, net::Stream_ptr outgoing) + Session& Nodes::create_session(net::Stream_ptr client, net::Stream_ptr outgoing) { int idx = -1; if (free_sessions.empty()) { idx = sessions.size(); - sessions.emplace_back(*this, idx, talk, std::move(client), std::move(outgoing)); + sessions.emplace_back(*this, idx, std::move(client), std::move(outgoing)); } else { idx = free_sessions.back(); - new (&sessions[idx]) Session(*this, idx, talk, std::move(client), std::move(outgoing)); + new (&sessions[idx]) Session(*this, idx, std::move(client), std::move(outgoing)); free_sessions.pop_back(); } session_total++; @@ -251,18 +265,20 @@ namespace microLB LBOUT("Session %d closed (total = %d)\n", session.self, session_cnt); } - Node::Node(netstack_t& stk, net::Socket a, const pool_signal_t& sig) - : stack(stk), addr(a), pool_signal(sig) + Node::Node(netstack_t& stk, net::Socket a, const pool_signal_t& sig, bool da) + : stack(stk), pool_signal(sig), addr(a), do_active_check(da) { - // periodically connect to node and determine if active - // however, perform first check immediately - this->active_timer = Timers::periodic(0s, ACTIVE_CHECK_PERIOD, - [this] (int) { - this->perform_active_check(); - }); + if (this->do_active_check) + { + // periodically connect to node and determine if active + // however, perform first check immediately + this->active_timer = Timers::periodic(0s, ACTIVE_CHECK_PERIOD, + {this, &Node::perform_active_check}); + } } - void Node::perform_active_check() + void Node::perform_active_check(int) { + assert(this->do_active_check); try { this->stack.tcp().connect(this->addr, [this] (auto conn) { @@ -291,24 +307,28 @@ namespace microLB { // set as inactive this->active = false; - // begin checking active again - if (this->active_timer == Timers::UNUSED_ID) + if (this->do_active_check) { - this->active_timer = Timers::periodic( - ACTIVE_INITIAL_PERIOD, ACTIVE_CHECK_PERIOD, - [this] (int) { - this->perform_active_check(); - }); + // begin checking active again + if (this->active_timer == Timers::UNUSED_ID) + { + this->active_timer = Timers::periodic( + ACTIVE_INITIAL_PERIOD, ACTIVE_CHECK_PERIOD, + {this, &Node::perform_active_check}); + } } } void Node::stop_active_check() { // set as active this->active = true; - // stop active checking for now - if (this->active_timer != Timers::UNUSED_ID) { - Timers::stop(this->active_timer); - this->active_timer = Timers::UNUSED_ID; + if (this->do_active_check) + { + // stop active checking for now + if (this->active_timer != Timers::UNUSED_ID) { + Timers::stop(this->active_timer); + this->active_timer = Timers::UNUSED_ID; + } } } void Node::connect() @@ -368,7 +388,7 @@ namespace microLB } // use indexing to access Session because std::vector - Session::Session(Nodes& n, int idx, bool talk, + Session::Session(Nodes& n, int idx, net::Stream_ptr inc, net::Stream_ptr out) : parent(n), self(idx), incoming(std::move(inc)), outgoing(std::move(out)) diff --git a/lib/microLB/micro_lb/balancer.hpp b/lib/microLB/micro_lb/balancer.hpp index f1fd74fce1..41a6cb503a 100644 --- a/lib/microLB/micro_lb/balancer.hpp +++ b/lib/microLB/micro_lb/balancer.hpp @@ -21,7 +21,7 @@ namespace microLB struct Nodes; struct Session { - Session(Nodes&, int idx, bool talk, net::Stream_ptr in, net::Stream_ptr out); + Session(Nodes&, int idx, net::Stream_ptr in, net::Stream_ptr out); bool is_alive() const; void serialize(liu::Storage&); @@ -32,26 +32,28 @@ namespace microLB }; struct Node { - Node(netstack_t& stk, net::Socket a, const pool_signal_t&); + Node(netstack_t& stk, net::Socket a, const pool_signal_t&, bool do_active); auto address() const noexcept { return this->addr; } int connection_attempts() const noexcept { return this->connecting; } int pool_size() const noexcept { return pool.size(); } bool is_active() const noexcept { return active; }; + bool active_check() const noexcept { return do_active_check; } void restart_active_check(); - void perform_active_check(); + void perform_active_check(int); void stop_active_check(); void connect(); net::Stream_ptr get_connection(); netstack_t& stack; private: - net::Socket addr; const pool_signal_t& pool_signal; std::vector pool; + net::Socket addr; bool active = false; - int active_timer = -1; + const bool do_active_check; + signed int active_timer = -1; signed int connecting = 0; }; @@ -59,7 +61,7 @@ namespace microLB typedef std::deque nodevec_t; typedef nodevec_t::iterator iterator; typedef nodevec_t::const_iterator const_iterator; - Nodes() {} + Nodes(bool ac) : do_active_check(ac) {} size_t size() const noexcept; const_iterator begin() const; @@ -76,7 +78,7 @@ namespace microLB void create_connections(int total); // returns the connection back if the operation fails net::Stream_ptr assign(net::Stream_ptr, queue_vector_t&); - Session& create_session(bool talk, net::Stream_ptr inc, net::Stream_ptr out); + Session& create_session(net::Stream_ptr inc, net::Stream_ptr out); void close_session(int); Session& get_session(int); @@ -89,13 +91,14 @@ namespace microLB int session_cnt = 0; int conn_iterator = 0; int algo_iterator = 0; + const bool do_active_check; std::deque sessions; std::deque free_sessions; }; struct Balancer { - Balancer(netstack_t& in, uint16_t port, netstack_t& out); - Balancer(netstack_t& in, uint16_t port, netstack_t& out, + Balancer(netstack_t& in, uint16_t port, netstack_t& out, bool do_ac); + Balancer(netstack_t& in, uint16_t port, netstack_t& out, bool do_ac, const std::string& cert, const std::string& key); static Balancer* from_config(); @@ -129,6 +132,6 @@ namespace microLB template inline void Nodes::add_node(Args&&... args) { - nodes.emplace_back(std::forward (args)...); + nodes.emplace_back(std::forward (args)..., this->do_active_check); } } diff --git a/lib/microLB/micro_lb/openssl.cpp b/lib/microLB/micro_lb/openssl.cpp index 0c638a8be5..139f3fada0 100644 --- a/lib/microLB/micro_lb/openssl.cpp +++ b/lib/microLB/micro_lb/openssl.cpp @@ -10,9 +10,10 @@ namespace microLB netstack_t& in, uint16_t port, netstack_t& out, + const bool do_ac, const std::string& tls_cert, const std::string& tls_key) - : nodes(), netin(in), netout(out), signal({this, &Balancer::handle_queue}) + : nodes(do_ac), netin(in), netout(out), signal({this, &Balancer::handle_queue}) { fs::memdisk().init_fs( [] (fs::error_t err, fs::File_system&) { diff --git a/test/net/integration/microLB/config.json b/test/net/integration/microLB/config.json index f47fe36025..dffa9e09e8 100644 --- a/test/net/integration/microLB/config.json +++ b/test/net/integration/microLB/config.json @@ -27,6 +27,7 @@ }, "nodes" : { "iface" : 1, + "active_check" : true, "algo" : "round_robin", "list" : [ ["10.0.0.1", 6001], From 069dae5305b56aee9a92e0fd9e942d21dacab247 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 28 Nov 2018 11:02:12 +0100 Subject: [PATCH 06/93] tcp: minor changes to window scaling --- api/net/tcp/common.hpp | 2 +- src/net/tcp/connection.cpp | 9 +++++---- src/net/tcp/tcp.cpp | 23 +++++++++++++++-------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/api/net/tcp/common.hpp b/api/net/tcp/common.hpp index c7a4fd3b82..690d16c4ff 100644 --- a/api/net/tcp/common.hpp +++ b/api/net/tcp/common.hpp @@ -32,7 +32,7 @@ namespace net { // default size of TCP window - how much data can be "in flight" (unacknowledged) static constexpr uint16_t default_window_size {0xffff}; // window scaling + window size - static constexpr uint8_t default_window_scaling {5}; + static constexpr uint8_t default_window_scaling {7}; static constexpr uint32_t default_ws_window_size {8192 << default_window_scaling}; // use of timestamps option static constexpr bool default_timestamps {true}; diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index aa56bd988d..26a8b308bd 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -323,10 +323,10 @@ Packet_view_ptr Connection::create_outgoing_packet() packet->set_source(local_); // Set Destination (remote) packet->set_destination(remote_); + uint32_t shifted = std::min((cb.RCV.WND >> cb.RCV.wind_shift), default_window_size); + Ensures(shifted <= 0xffff); - const auto recv_wnd = recv_wnd_getter(); - //printf("recv_wnd %u\n", recv_wnd); - packet->set_win(cb.RCV.WND >> cb.RCV.wind_shift); + packet->set_win(shifted); if(cb.SND.TS_OK) packet->add_tcp_option_aligned(host_.get_ts_value(), cb.get_ts_recent()); @@ -691,8 +691,9 @@ void Connection::recv_data(const Packet_view& in) // since it could be that we already preallocated that memory in our vector. // i also think we shouldn't reach this point due to State::check_seq checking // if we're inside the window. if packet is out of order tho we can change the RCV wnd (i think). - if(recv_wnd_getter() == 0) + if(recv_wnd_getter() == 0) { drop(in, Drop_reason::RCV_WND_ZERO); + } // Keep track if a packet is being sent during the async read callback diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index 2ccf8055f0..ca80d2abb4 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -30,6 +30,8 @@ #include #include #include +#include +#include using namespace std; using namespace net; @@ -375,11 +377,19 @@ void TCP::reset_pmtu(Socket dest, IP4::PMTU pmtu) { uint32_t TCP::global_recv_wnd() { - // 80% of free mem - // normalize to 0 to avoid negative value (???) - ssize_t avail = std::max((static_cast(OS::heap_avail()) * 80 / 100) / 2, 0); - //printf("heap: %zi avail: %zu\n", (ssize_t)OS::heap_avail(), avail); - return std::min(avail, (1 << 30)); // max can only be 1GB + using namespace util; + + auto max_use = OS::heap_max() / 4; // TODO: make proportion into variable + auto in_use = OS::heap_usage(); + + if (in_use >= max_use) { + printf("global_recv_wnd: Receive window empty. Heap use: %zu \n", in_use); + return 0; + } + + ssize_t buf_avail = max_use - in_use; + + return std::min(buf_avail, 4_MiB); } void TCP::transmit(tcp::Packet_view_ptr packet) @@ -615,6 +625,3 @@ TCP::Listeners::const_iterator TCP::cfind_listener(const Socket& socket) const return it; } - - - From 55ecd1310ec9d2fe20ed914323dc07908449a1a4 Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Wed, 28 Nov 2018 13:04:44 +0100 Subject: [PATCH 07/93] tcp: Assert on size to avoid overshooting buffer --- src/net/tcp/write_queue.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/net/tcp/write_queue.cpp b/src/net/tcp/write_queue.cpp index c40c20249c..0c866de7f4 100644 --- a/src/net/tcp/write_queue.cpp +++ b/src/net/tcp/write_queue.cpp @@ -31,6 +31,7 @@ void Write_queue::advance(size_t bytes) auto& buf = nxt(); offset_ += bytes; + assert(offset_ <= buf->size()); debug2(" Advance: bytes=%u off=%u rem=%u\n", bytes, offset_, (buf->size() - offset_)); From ac3c07f8a4221e1a3afb462c1ef28a06fb072bce Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Wed, 28 Nov 2018 13:05:48 +0100 Subject: [PATCH 08/93] stream: Add transport_bottom() to get the outermost stream --- api/net/stream.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/api/net/stream.hpp b/api/net/stream.hpp index ab471c10bf..c4f99b441e 100644 --- a/api/net/stream.hpp +++ b/api/net/stream.hpp @@ -181,10 +181,23 @@ namespace net { **/ virtual Stream* transport() noexcept = 0; + /** Recursively navigate to the transport stream at the bottom **/ + inline Stream* bottom_transport() noexcept; + virtual size_t serialize_to(void*) const = 0; virtual ~Stream() = default; }; // < class Stream + + inline Stream* Stream::bottom_transport() noexcept + { + Stream* stream = this; + while (stream->transport() != nullptr) { + stream = stream->transport(); + } + return stream; + } + } // < namespace net #endif // < NET_STREAM_HPP From d3d20c75c69bd60b8725c275d737db573a1cd5af Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Wed, 28 Nov 2018 13:06:29 +0100 Subject: [PATCH 09/93] Connect TCP recv window handlers between in/out on sessions --- lib/microLB/micro_lb/balancer.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 1d42fc3f3f..1c6ba22e16 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -411,6 +411,29 @@ namespace microLB [&nodes = n, idx] () { nodes.close_session(idx); }); + + // get the actual TCP connections + auto conn_in = dynamic_cast(incoming->bottom_transport())->tcp(); + assert(conn_in != nullptr); + auto conn_out = dynamic_cast(outgoing->bottom_transport())->tcp(); + assert(conn_out != nullptr); + + static const uint32_t sendq_max = 0x400000; + // set recv window handlers + conn_in->set_recv_wnd_getter( + [conn_out] () -> uint32_t { + auto sendq_size = conn_out->sendq_size(); + if (sendq_size == 0) + printf("WARNING: Incoming reports sendq size: %u\n", sendq_size); + return sendq_max - sendq_size; + }); + conn_out->set_recv_wnd_getter( + [conn_in] () -> uint32_t { + auto sendq_size = conn_in->sendq_size(); + if (sendq_size == 0) + printf("WARNING: Outgoing reports sendq size: %u\n", sendq_size); + return sendq_max - sendq_size; + }); } bool Session::is_alive() const { return incoming != nullptr; From c63ed1434fddd094a211688e0d7160820c885c8e Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Wed, 28 Nov 2018 13:55:29 +0100 Subject: [PATCH 10/93] openssl: More feedback on failing certs/keys --- src/net/openssl/client.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/net/openssl/client.cpp b/src/net/openssl/client.cpp index 30e15cff8b..c561b428e5 100644 --- a/src/net/openssl/client.cpp +++ b/src/net/openssl/client.cpp @@ -30,7 +30,7 @@ tls_load_from_memory(X509_STORE* store, auto* cert = PEM_read_bio_X509(cbio, NULL, 0, NULL); assert(cert != NULL && "Invalid certificate"); int res = X509_STORE_add_cert(store, cert); - assert(res == 1); + assert(res == 1 && "The X509 store did not accept the certificate"); BIO_free(cbio); } @@ -44,7 +44,8 @@ tls_private_key_for_ctx(SSL_CTX* ctx, int bits = 2048) int ret = RSA_generate_key_ex(rsa, bits, bne, NULL); assert(ret == 1); - SSL_CTX_use_RSAPrivateKey(ctx, rsa); + ret = SSL_CTX_use_RSAPrivateKey(ctx, rsa); + assert(ret == 1 && "OpenSSL context did not accept the private key"); } static SSL_CTX* From 5dedaac4060eb518adbe17657694d56ac3273a27 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 29 Nov 2018 14:54:23 +0100 Subject: [PATCH 11/93] microlb: change send queue warning from full to empty --- lib/microLB/micro_lb/balancer.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 1c6ba22e16..4a456275b8 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -411,26 +411,27 @@ namespace microLB [&nodes = n, idx] () { nodes.close_session(idx); }); - + // get the actual TCP connections auto conn_in = dynamic_cast(incoming->bottom_transport())->tcp(); assert(conn_in != nullptr); auto conn_out = dynamic_cast(outgoing->bottom_transport())->tcp(); assert(conn_out != nullptr); - + static const uint32_t sendq_max = 0x400000; // set recv window handlers conn_in->set_recv_wnd_getter( [conn_out] () -> uint32_t { auto sendq_size = conn_out->sendq_size(); - if (sendq_size == 0) + + if (sendq_size > sendq_max) printf("WARNING: Incoming reports sendq size: %u\n", sendq_size); return sendq_max - sendq_size; }); conn_out->set_recv_wnd_getter( [conn_in] () -> uint32_t { auto sendq_size = conn_in->sendq_size(); - if (sendq_size == 0) + if (sendq_size > sendq_max) printf("WARNING: Outgoing reports sendq size: %u\n", sendq_size); return sendq_max - sendq_size; }); From db8f4d64545784307b3e4069b56d33c7dfe4b8ce Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 30 Nov 2018 01:47:45 +0100 Subject: [PATCH 12/93] util: added std::pmr resource pool --- api/util/alloc_pmr.hpp | 110 +++++++++++++ api/util/detail/alloc_pmr.hpp | 259 ++++++++++++++++++++++++++++++ src/util/pmr_default.cpp | 9 ++ test/CMakeLists.txt | 2 + test/util/unit/pmr_alloc_test.cpp | 219 +++++++++++++++++++++++++ 5 files changed, 599 insertions(+) create mode 100644 api/util/alloc_pmr.hpp create mode 100644 api/util/detail/alloc_pmr.hpp create mode 100644 src/util/pmr_default.cpp create mode 100644 test/util/unit/pmr_alloc_test.cpp diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp new file mode 100644 index 0000000000..0987526e26 --- /dev/null +++ b/api/util/alloc_pmr.hpp @@ -0,0 +1,110 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2018 IncludeOS AS, Oslo, Norway +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTIL_ALLOC_PMR +#define UTIL_ALLOC_PMR + +#if __has_include() +#include +#include +namespace std { + namespace pmr = std::experimental::pmr; +} +#else +#include +#include // For pmr::vector +#endif + + +#include +#include + +namespace os::mem::detail { + class Pmr_pool; + using Pool_ptr = std::shared_ptr; +} + +namespace os::mem { + + class Pmr_resource; + + class Pmr_pool { + public: + static constexpr size_t default_max_resources = 64; + using Resource = Pmr_resource; + using Resource_ptr = std::unique_ptr>; + + inline + Pmr_pool(size_t capacity, + size_t cap_suballoc = 0, + size_t max_rescount = default_max_resources); + inline Resource_ptr get_resource(); + inline void return_resource(Resource* res); + inline std::size_t resource_capacity(); + inline std::size_t resource_count(); + inline std::size_t total_capacity(); + inline void set_resource_capacity(std::size_t); + inline void set_total_capacity(std::size_t); + inline std::size_t bytes_used(); + inline std::size_t bytes_free(); + inline bool empty(); + private: + detail::Pool_ptr impl; + }; + + + class Pmr_resource : public std::pmr::memory_resource { + public: + using Pool_ptr = detail::Pool_ptr; + inline Pmr_resource(Pool_ptr p); + inline Pool_ptr pool(); + inline void* do_allocate(std::size_t size, std::size_t align) override; + inline void do_deallocate (void* ptr, size_t, size_t) override; + inline bool do_is_equal(const std::pmr::memory_resource&) const noexcept override; + inline std::size_t capacity(); + inline std::size_t bytes_free(); + inline std::size_t bytes_used(); + inline std::size_t allocations(); + inline std::size_t deallocations(); + inline bool empty(); + private: + Pool_ptr pool_; + std::size_t used = 0; + std::size_t allocs = 0; + std::size_t deallocs = 0; + }; + + struct Default_pmr : public std::pmr::memory_resource { + void* do_allocate(std::size_t size, std::size_t align) override { + return memalign(align, size); + } + + void do_deallocate (void* ptr, size_t, size_t) override { + std::free(ptr); + } + + bool do_is_equal (const std::pmr::memory_resource& other) const noexcept override { + if (const auto* underlying = dynamic_cast(&other)) + return true; + return false; + } + }; + +} + +#include + +#endif diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp new file mode 100644 index 0000000000..18f1183a91 --- /dev/null +++ b/api/util/detail/alloc_pmr.hpp @@ -0,0 +1,259 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2018 IncludeOS AS, Oslo, Norway +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTIL_DETAIL_ALLOC_PMR +#define UTIL_DETAIL_ALLOC_PMR + +#include +#include +#include +#include + +namespace os::mem::detail { + + class Pmr_pool : public std::enable_shared_from_this, public std::pmr::memory_resource { + public: + using Resource = os::mem::Pmr_pool::Resource; + using Resource_ptr = os::mem::Pmr_pool::Resource_ptr; + Pmr_pool(std::size_t total_max, std::size_t suballoc_max, std::size_t max_allocs) + : cap_total_{total_max}, cap_suballoc_{suballoc_max}, max_resources_{max_allocs} {} + + void* do_allocate(size_t size, size_t align) override { + if (UNLIKELY(size + allocated_ > cap_total_)) { + throw std::bad_alloc(); + } + + // Adapt to memalign's minimum size- and alignemnt requiremnets + if (align < sizeof(void*)) + align = sizeof(void*); + + if (size < sizeof(void*)) + size = sizeof(void*); + + void* buf = memalign(align, size); + + if (buf == nullptr) { + throw std::bad_alloc(); + } + + allocated_ += size; + return buf; + } + + void do_deallocate (void* ptr, size_t size, size_t) override { + + // Adapt to memalign + if (size < sizeof(void*)) + size = sizeof(void*); + + free(ptr); + allocated_ -= size; + } + + bool do_is_equal(const std::pmr::memory_resource&) const noexcept override { + return true; + } + + Resource_ptr resource_from_raw(Resource* raw) { + Resource_ptr res_ptr(raw, [this](auto* res) { + Expects(res->pool().get() == this); + this->return_resource(res); + }); + + return res_ptr; + } + + Resource_ptr get_resource() { + + if (! free_resources_.empty()) { + auto res = std::move(free_resources_.back()); + free_resources_.pop_back(); + return res; + } + + if (used_resources_ >= max_resources_) + return nullptr; + + auto res = resource_from_raw(new Pmr_resource(shared_ptr())); + + used_resources_++; + + Ensures(res != nullptr); + Ensures(used_resources_ <= max_resources_); + return res; + } + + void return_resource(Resource* raw) { + auto res_ptr = resource_from_raw(raw); + used_resources_--; + free_resources_.emplace_back(std::move(res_ptr)); + } + + std::shared_ptr shared_ptr() { + return shared_from_this(); + } + + std::size_t resource_count() { + return used_resources_ + free_resources_.size(); + } + + std::size_t free_resources() { + return free_resources_.size(); + } + + std::size_t used_resources() { + return used_resources_; + } + + std::size_t total_capacity() { + return cap_total_; + } + + void set_resource_capacity(std::size_t sz) { + cap_suballoc_ = sz; + } + + void set_total_capacity(std::size_t sz) { + cap_total_ = sz; + } + + std::size_t resource_capacity() { + if (cap_suballoc_ == 0) { + if (used_resources_ == 0) + return cap_total_; + return cap_total_ / used_resources_; + } + return cap_suballoc_; + } + + std::size_t bytes_booked() { + return cap_suballoc_ * used_resources_; + } + + std::size_t bytes_bookable() { + auto booked = bytes_booked(); + if (booked > cap_total_) + return 0; + + return cap_total_ - booked; + } + + + std::size_t bytes_used() { + return allocated_; + } + + bool empty() { + return allocated_ >= cap_total_; + } + + std::size_t bytes_free() { + auto allocd = bytes_used(); + if (allocd > cap_total_) + return 0; + return cap_total_ - allocd; + } + + // NOTE: This can cause leaks or other chaos if you're not sure what you're doing + void clear_free_resources() { + for (auto& res : free_resources_) { + *res = Resource(shared_ptr()); + } + } + + private: + std::size_t allocated_ = 0; + std::size_t cap_total_ = 0; + std::size_t cap_suballoc_ = 0; + std::size_t max_resources_ = 0; + std::size_t used_resources_ = 0; + std::deque free_resources_{}; + }; + +} // os::mem::detail + + +namespace os::mem { + + // + // Pmr_pool implementatino (PIMPL wrapper) + // + std::size_t Pmr_pool::total_capacity() { return impl->total_capacity(); } + std::size_t Pmr_pool::resource_capacity() { return impl->resource_capacity(); } + std::size_t Pmr_pool::bytes_free() { return impl->bytes_free(); } + std::size_t Pmr_pool::bytes_used() { return impl->bytes_used(); } + void Pmr_pool::set_resource_capacity(std::size_t s) { impl->set_resource_capacity(s); } + void Pmr_pool::set_total_capacity(std::size_t s) { impl->set_total_capacity(s); }; + + Pmr_pool::Pmr_pool(size_t sz, size_t sz_sub, size_t max_allocs) + : impl{std::make_shared(sz, sz_sub, max_allocs)}{} + Pmr_pool::Resource_ptr Pmr_pool::get_resource() { return impl->get_resource(); } + std::size_t Pmr_pool::resource_count() { return impl->resource_count(); } + void Pmr_pool::return_resource(Resource* res) { impl->return_resource(res); } + bool Pmr_pool::empty() { return impl->empty(); } + + // + // Pmr_resource implementation + // + Pmr_resource::Pmr_resource(Pool_ptr p) : pool_{p} {} + std::size_t Pmr_resource::capacity() { return pool_->resource_capacity(); } + std::size_t Pmr_resource::bytes_free() { + auto cap = capacity(); + if (used > capacity()) + return 0; + return cap - used; + } + std::size_t Pmr_resource::bytes_used() { + return used; + } + + Pmr_resource::Pool_ptr Pmr_resource::pool() { + return pool_; + } + + void* Pmr_resource::do_allocate(std::size_t size, std::size_t align) { + auto cap = capacity(); + if (UNLIKELY(size + used > cap)) { + throw std::bad_alloc(); + } + + void* buf = pool_->allocate(size, align); + + used += size; + allocs++; + + return buf; + } + + void Pmr_resource::do_deallocate(void* ptr, std::size_t s, std::size_t a) { + deallocs++; + pool_->deallocate(ptr,s,a); + used -= s; + } + + bool Pmr_resource::do_is_equal(const std::pmr::memory_resource& other) const noexcept { + if (const auto* other_ptr = dynamic_cast(&other)) { + return pool_ == other_ptr->pool_; + } + return false; + } + + bool Pmr_resource::empty() { + return used >= capacity(); + } +} + +#endif diff --git a/src/util/pmr_default.cpp b/src/util/pmr_default.cpp new file mode 100644 index 0000000000..cc7fc7f4e4 --- /dev/null +++ b/src/util/pmr_default.cpp @@ -0,0 +1,9 @@ + +#include + +std::pmr::memory_resource* std::pmr::get_default_resource() noexcept { + static os::mem::Default_pmr* default_pmr; + if (default_pmr == nullptr) + default_pmr = new os::mem::Default_pmr{}; + return default_pmr; +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ed4ad14396..75f593b346 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -158,6 +158,7 @@ set(TEST_SOURCES ${TEST}/util/unit/bitops.cpp ${TEST}/util/unit/lstack.cpp ${TEST}/util/unit/buddy_alloc_test.cpp + ${TEST}/util/unit/pmr_alloc_test.cpp ) set(OS_SOURCES @@ -241,6 +242,7 @@ set(OS_SOURCES ${SRC}/util/syslogd.cpp ${SRC}/util/tar.cpp ${SRC}/util/uri.cpp + # ${SRC}/util/pmr_default.cpp # <- older libc++ versions might need this ${SRC}/virtio/virtio_queue.cpp ) diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp new file mode 100644 index 0000000000..44284693e0 --- /dev/null +++ b/test/util/unit/pmr_alloc_test.cpp @@ -0,0 +1,219 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2018 IncludeOS AS, Oslo, Norway +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// #define DEBUG_UNIT + +#include +#include +#include + +CASE("os::mem::default_pmr_resource") { + + std::pmr::vector numbers; + for (int i = 0; i < 1000; i++) { + numbers.push_back(i * 42); + } + + EXPECT(numbers.size() == 1000); +} + + +CASE("os::mem::Pmr_pool usage") { + using namespace util; + constexpr auto pool_cap = 40_MiB; + + // Using default suballoc capacity, which is pool_cap / allocator count + os::mem::Pmr_pool pool{pool_cap}; + + + EXPECT(pool.total_capacity() == pool_cap); + auto res = pool.get_resource(); + EXPECT(res->capacity() == pool_cap); + + auto sub_free = res->bytes_free(); + + std::pmr::polymorphic_allocator alloc{res.get()}; + std::pmr::vector numbers{alloc}; + + EXPECT(numbers.capacity() < 1000); + numbers.reserve(1000); + EXPECT(numbers.capacity() == 1000); + + for (auto i : test::random) + numbers.push_back(i); + + for (auto i = 0; i < numbers.size(); i++) + EXPECT(numbers.at(i) == test::random.at(i)); + + EXPECT(res->bytes_free() <= sub_free - 1000); + + + // Using small res capacity + constexpr auto alloc_cap = 4_KiB; + + os::mem::Pmr_pool pool2{pool_cap, alloc_cap}; + EXPECT(pool2.total_capacity() == pool_cap); + EXPECT(pool2.bytes_free() == pool_cap); + + auto res2 = pool2.get_resource(); + EXPECT(res2->capacity() == alloc_cap); + + auto sub_free2 = res2->bytes_free(); + + std::pmr::polymorphic_allocator alloc2{res2.get()}; + std::pmr::vector numbers2{alloc2}; + + EXPECT(numbers2.capacity() < 1000); + numbers2.reserve(1000); + EXPECT(numbers2.capacity() == 1000); + + + EXPECT(res2->bytes_free() <= sub_free2 - 1000); + EXPECT_THROWS(numbers2.reserve(8_KiB)); + numbers2.clear(); + numbers2.shrink_to_fit(); + + EXPECT(res2->bytes_free() == alloc_cap); + EXPECT(res2->bytes_used() == 0); + EXPECT(pool2.bytes_free() == pool_cap); + EXPECT(pool2.bytes_used() == 0); + + numbers2.push_back(1); + EXPECT(numbers2.capacity() > 0); + EXPECT(numbers2.capacity() < 1000); + EXPECT(res2->bytes_free() < alloc_cap); + EXPECT(res2->bytes_free() > alloc_cap - 1000); + +} + + +CASE("os::mem::Pmr_suballoc usage") { + using namespace util; + + constexpr auto pool_cap = 400_KiB; + constexpr auto resource_cap = 4_KiB; + constexpr auto resource_count = 100; + + os::mem::Pmr_pool pool{pool_cap, resource_cap, resource_count}; + + // Get resources enought to saturate pool. + std::vector resources; + + // Get first resource for comparing it's pool ptr. + auto res1 = pool.get_resource(); + EXPECT(res1 != nullptr); + auto pool_ptr = res1->pool(); + + resources.emplace_back(std::move(res1)); + + // Resources are created on-demand, up to max + for (int i = 0; i < resource_count - 1; i++) { + EXPECT(pool.resource_count() == i + 1); + auto res = pool.get_resource(); + EXPECT(res != nullptr); + EXPECT(res->capacity() == resource_cap); + EXPECT(res->bytes_used() == 0); + EXPECT(res->pool() == pool_ptr); + resources.emplace_back(std::move(res)); + } + + // You now can't get more resources + auto unavail = pool.get_resource(); + EXPECT(unavail == nullptr); + + EXPECT(resources.size() == resource_count); + EXPECT(pool.resource_count() == resource_count); + EXPECT(pool_ptr->free_resources() == 0); + EXPECT(pool_ptr->used_resources() == resource_count); + + std::vector allocations{}; + + // Drain all the resources + for (auto& res : resources) { + auto* p1 = res->allocate(1_KiB); + auto* p2 = res->allocate(1_KiB); + auto* p3 = res->allocate(1_KiB); + auto* p4 = res->allocate(1_KiB); + EXPECT(p1 != nullptr); + EXPECT(p2 != nullptr); + EXPECT(p3 != nullptr); + EXPECT(p4 != nullptr); + + allocations.push_back(p1); + allocations.push_back(p2); + allocations.push_back(p3); + allocations.push_back(p4); + + EXPECT(res->empty()); + EXPECT_THROWS(res->allocate(1_KiB)); + EXPECT(res->empty()); + } + + // The pool is now also empty + EXPECT(pool.empty()); + + // So resources can't allocate more from it. + // (Pools pmr api is hidden behind implementation for now.) + EXPECT_THROWS(pool_ptr->allocate(1_KiB)); + + int i = 0; + // Return all resources + for (auto& res : resources) { + EXPECT(pool_ptr->free_resources() == i++); + res.reset(); + } + + for (auto& res : resources) { + EXPECT(res == nullptr); + } + + // The resource count remains constant, but all free no used. + EXPECT(pool.resource_count() == resource_count); + EXPECT(pool_ptr->free_resources() == resource_count); + EXPECT(pool_ptr->used_resources() == 0); + + // Pool is still empty - deallocations haven't happened + EXPECT(pool.empty()); + EXPECT(pool.bytes_free() == 0); + + // NOTE: it's currently possible to deallocate directly to the detail::Pool_ptr + // this is an implementation detail prone to change as each allocator's state + // won't e updated accordingly. + for (auto* alloc : allocations) + pool_ptr->deallocate(alloc, 1_KiB); + + EXPECT(not pool.empty()); + EXPECT(pool.bytes_free() == pool_cap); + + // Each resource's state is remembered as it's passed back and forth. + // ...There's now no way of fetching any resources + auto res_tricked = pool.get_resource(); + EXPECT(res_tricked->empty()); + EXPECT(res_tricked->bytes_free() == 0); + EXPECT_THROWS(res_tricked->allocate(1_KiB)); + + res_tricked.reset(); + pool_ptr->clear_free_resources(); + + auto res2 = pool.get_resource(); + EXPECT(not res2->empty()); + EXPECT(res2->bytes_free() == resource_cap); + auto ptr = res2->allocate(1_KiB); + EXPECT(ptr != nullptr); + res2->deallocate(ptr, 1_KiB); + EXPECT(res2->bytes_free() == resource_cap); + +} From 75450c8ba27e996db9e75c9384c7cd3941bfc59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 30 Nov 2018 11:49:39 +0100 Subject: [PATCH 13/93] mem: Refactor OS to use pmr vector as buffer_t for most facilities --- api/fs/common.hpp | 5 +++-- api/hw/block_device.hpp | 5 +++-- api/net/stream.hpp | 5 +++-- api/net/tcp/common.hpp | 8 +++++--- api/pmr | 16 ++++++++++++++++ api/util/alloc_buddy.hpp | 9 +-------- lib/LiveUpdate/serialize_tcp.cpp | 2 +- src/drivers/ide.cpp | 2 +- src/net/https/botan_server.cpp | 2 +- src/posix/udp_fd.cpp | 2 +- 10 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 api/pmr diff --git a/api/fs/common.hpp b/api/fs/common.hpp index e2ecaa25fc..a6c8cd4616 100644 --- a/api/fs/common.hpp +++ b/api/fs/common.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include "path.hpp" #include @@ -35,12 +36,12 @@ namespace fs { /** * @brief Shared vector used as a buffer within the filesystem subsystem */ - using buffer_t = std::shared_ptr>; + using buffer_t = os::mem::buf_ptr; /** Construct a shared vector **/ template buffer_t construct_buffer(Args&&... args) { - return std::make_shared> (std::forward (args)...); + return std::make_shared (std::forward (args)...); } /** Container types **/ diff --git a/api/hw/block_device.hpp b/api/hw/block_device.hpp index 4de2732c1c..a59f1e63bd 100644 --- a/api/hw/block_device.hpp +++ b/api/hw/block_device.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace hw { @@ -32,7 +33,7 @@ namespace hw { class Block_device { public: using block_t = uint64_t; - using buffer_t = std::shared_ptr>; + using buffer_t = os::mem::buf_ptr; using on_read_func = delegate; using on_write_func = delegate; @@ -149,7 +150,7 @@ class Block_device { * This functionality is not enabled by default, nor always supported **/ virtual void write(block_t blk, buffer_t, on_write_func) = 0; - + virtual bool write_sync(block_t blk, buffer_t) = 0; /** diff --git a/api/net/stream.hpp b/api/net/stream.hpp index c4f99b441e..9eec91b4c5 100644 --- a/api/net/stream.hpp +++ b/api/net/stream.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -34,13 +35,13 @@ namespace net { */ class Stream { public: - using buffer_t = std::shared_ptr>; + using buffer_t = os::mem::buf_ptr; using ptr = Stream_ptr; /** Construct a shared vector used by streams **/ template static buffer_t construct_buffer(Args&&... args) { - return std::make_shared> (std::forward (args)...); + return std::make_shared (std::forward (args)...); } /** Called when the stream is ready to be used. */ diff --git a/api/net/tcp/common.hpp b/api/net/tcp/common.hpp index 690d16c4ff..f78dca8615 100644 --- a/api/net/tcp/common.hpp +++ b/api/net/tcp/common.hpp @@ -22,8 +22,10 @@ #include #include #include -#include + #include +#include +#include namespace net { namespace tcp { @@ -62,12 +64,12 @@ namespace net { using seq_t = uint32_t; /** A shared buffer pointer */ - using buffer_t = std::shared_ptr>; + using buffer_t = os::mem::buf_ptr; /** Construct a shared vector used in TCP **/ template buffer_t construct_buffer(Args&&... args) { - return std::make_shared> (std::forward (args)...); + return std::make_shared (std::forward (args)...); } class Connection; diff --git a/api/pmr b/api/pmr new file mode 100644 index 0000000000..6d0025725b --- /dev/null +++ b/api/pmr @@ -0,0 +1,16 @@ +#pragma once + +#if __has_include() +#include +#else +#include +#include +namespace std { + namespace pmr = std::experimental::pmr; +} +#endif + +namespace os::mem { + using buffer = std::pmr::vector; + using buf_ptr = std::shared_ptr; +} diff --git a/api/util/alloc_buddy.hpp b/api/util/alloc_buddy.hpp index e50a0cd0b0..73e3ab0c7c 100644 --- a/api/util/alloc_buddy.hpp +++ b/api/util/alloc_buddy.hpp @@ -21,14 +21,7 @@ #include #include #include -#if __has_include() -#include -#else -#include -namespace std::pmr { - using memory_resource = std::experimental::pmr::memory_resource; -} -#endif +#include #include #include diff --git a/lib/LiveUpdate/serialize_tcp.cpp b/lib/LiveUpdate/serialize_tcp.cpp index f7b4e8a892..2aac13bdad 100644 --- a/lib/LiveUpdate/serialize_tcp.cpp +++ b/lib/LiveUpdate/serialize_tcp.cpp @@ -113,7 +113,7 @@ int Write_queue::deserialize_from(void* addr) len += sizeof(write_buffer); // insert shared buffer into write queue - this->q.emplace_back(std::make_shared>()); + this->q.emplace_back(net::tcp::construct_buffer()); // copy data auto wbuf = this->q.back(); diff --git a/src/drivers/ide.cpp b/src/drivers/ide.cpp index c54ac20f74..d572591a8c 100644 --- a/src/drivers/ide.cpp +++ b/src/drivers/ide.cpp @@ -98,7 +98,7 @@ struct workq_item workq_item(uint8_t id, block_t blk, uint32_t cnt, on_read_func call) : drive_id(id), read(true), sector(blk), total(cnt), readcall(std::move(call)) { - buffer = std::make_shared> (total * IDE::SECTOR_SIZE); + buffer = std::make_shared (total * IDE::SECTOR_SIZE); } #endif #ifdef IDE_ENABLE_WRITE diff --git a/src/net/https/botan_server.cpp b/src/net/https/botan_server.cpp index 2ed628bc6a..d620e05314 100644 --- a/src/net/https/botan_server.cpp +++ b/src/net/https/botan_server.cpp @@ -59,7 +59,7 @@ namespace http server_name, get_rng(), std::move(ca_key), - Botan::X509_Certificate(*ca_cert.get()), + Botan::X509_Certificate(ca_cert.data(), ca_cert.size()), std::move(srv_key)); this->credman.reset(credman); diff --git a/src/posix/udp_fd.cpp b/src/posix/udp_fd.cpp index dda5882432..f1c9789cc1 100644 --- a/src/posix/udp_fd.cpp +++ b/src/posix/udp_fd.cpp @@ -41,7 +41,7 @@ void UDP_FD::recv_to_buffer(net::UDPSocket::addr_t addr, // only recv to buffer if not full if(buffer_.size() < max_buffer_msgs()) { // copy data into to-be Message buffer - auto buff = net::tcp::buffer_t(new std::vector (buf, buf + len)); + auto buff = net::tcp::construct_buffer(buf, buf + len); // emplace the message in buffer buffer_.emplace_back(htonl(addr.v4().whole), htons(port), std::move(buff)); } From fcf763777cad70ba9d71fce2b4c18892053b28f1 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 30 Nov 2018 12:54:16 +0100 Subject: [PATCH 14/93] pmr: refactor full / empty + minor changes --- api/util/alloc_pmr.hpp | 24 ++++++----- api/util/detail/alloc_pmr.hpp | 64 ++++++++++++++++++++---------- test/util/unit/pmr_alloc_test.cpp | 66 +++++++++++++++++-------------- 3 files changed, 94 insertions(+), 60 deletions(-) diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp index 0987526e26..5cbac6dfc6 100644 --- a/api/util/alloc_pmr.hpp +++ b/api/util/alloc_pmr.hpp @@ -47,10 +47,9 @@ namespace os::mem { using Resource = Pmr_resource; using Resource_ptr = std::unique_ptr>; - inline - Pmr_pool(size_t capacity, - size_t cap_suballoc = 0, - size_t max_rescount = default_max_resources); + inline Pmr_pool(size_t capacity, + size_t cap_suballoc = 0, + size_t max_rescount = default_max_resources); inline Resource_ptr get_resource(); inline void return_resource(Resource* res); inline std::size_t resource_capacity(); @@ -58,9 +57,13 @@ namespace os::mem { inline std::size_t total_capacity(); inline void set_resource_capacity(std::size_t); inline void set_total_capacity(std::size_t); - inline std::size_t bytes_used(); - inline std::size_t bytes_free(); + inline std::size_t allocated(); + inline std::size_t allocatable(); + inline std::size_t alloc_count(); + inline std::size_t dealloc_count(); + inline bool full(); inline bool empty(); + private: detail::Pool_ptr impl; }; @@ -75,10 +78,11 @@ namespace os::mem { inline void do_deallocate (void* ptr, size_t, size_t) override; inline bool do_is_equal(const std::pmr::memory_resource&) const noexcept override; inline std::size_t capacity(); - inline std::size_t bytes_free(); - inline std::size_t bytes_used(); - inline std::size_t allocations(); - inline std::size_t deallocations(); + inline std::size_t allocatable(); + inline std::size_t allocated(); + inline std::size_t alloc_count(); + inline std::size_t dealloc_count(); + inline bool full(); inline bool empty(); private: Pool_ptr pool_; diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 18f1183a91..5bd60e4836 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -76,27 +76,39 @@ namespace os::mem::detail { return res_ptr; } + Resource_ptr new_resource() { + Expects(used_resources_ < max_resources_); + + auto res = resource_from_raw(new Pmr_resource(shared_ptr())); + used_resources_++; + + Ensures(res != nullptr); + Ensures(used_resources_ <= max_resources_); + return res; + } + Resource_ptr get_resource() { if (! free_resources_.empty()) { - auto res = std::move(free_resources_.back()); - free_resources_.pop_back(); + auto res = std::move(free_resources_.front()); + free_resources_.pop_front(); + + if (UNLIKELY(! res->empty() and used_resources_ < max_resources_)) { + free_resources_.emplace_back(std::move(res)); + return new_resource(); + } + return res; } if (used_resources_ >= max_resources_) return nullptr; - auto res = resource_from_raw(new Pmr_resource(shared_ptr())); - - used_resources_++; - - Ensures(res != nullptr); - Ensures(used_resources_ <= max_resources_); - return res; + return new_resource(); } void return_resource(Resource* raw) { + Expects(used_resources_ > 0); auto res_ptr = resource_from_raw(raw); used_resources_--; free_resources_.emplace_back(std::move(res_ptr)); @@ -152,16 +164,20 @@ namespace os::mem::detail { } - std::size_t bytes_used() { + std::size_t allocated() { return allocated_; } - bool empty() { + bool full() { return allocated_ >= cap_total_; } - std::size_t bytes_free() { - auto allocd = bytes_used(); + bool empty() { + return allocated_ == 0; + } + + std::size_t allocatable() { + auto allocd = allocated(); if (allocd > cap_total_) return 0; return cap_total_ - allocd; @@ -193,8 +209,9 @@ namespace os::mem { // std::size_t Pmr_pool::total_capacity() { return impl->total_capacity(); } std::size_t Pmr_pool::resource_capacity() { return impl->resource_capacity(); } - std::size_t Pmr_pool::bytes_free() { return impl->bytes_free(); } - std::size_t Pmr_pool::bytes_used() { return impl->bytes_used(); } + std::size_t Pmr_pool::allocatable() { return impl->allocatable(); } + std::size_t Pmr_pool::allocated() { return impl->allocated(); } + void Pmr_pool::set_resource_capacity(std::size_t s) { impl->set_resource_capacity(s); } void Pmr_pool::set_total_capacity(std::size_t s) { impl->set_total_capacity(s); }; @@ -203,6 +220,7 @@ namespace os::mem { Pmr_pool::Resource_ptr Pmr_pool::get_resource() { return impl->get_resource(); } std::size_t Pmr_pool::resource_count() { return impl->resource_count(); } void Pmr_pool::return_resource(Resource* res) { impl->return_resource(res); } + bool Pmr_pool::full() { return impl->full(); } bool Pmr_pool::empty() { return impl->empty(); } // @@ -210,13 +228,14 @@ namespace os::mem { // Pmr_resource::Pmr_resource(Pool_ptr p) : pool_{p} {} std::size_t Pmr_resource::capacity() { return pool_->resource_capacity(); } - std::size_t Pmr_resource::bytes_free() { + std::size_t Pmr_resource::allocatable() { auto cap = capacity(); - if (used > capacity()) + if (used > cap) return 0; return cap - used; } - std::size_t Pmr_resource::bytes_used() { + + std::size_t Pmr_resource::allocated() { return used; } @@ -239,8 +258,9 @@ namespace os::mem { } void Pmr_resource::do_deallocate(void* ptr, std::size_t s, std::size_t a) { - deallocs++; + Expects(s != 0); // POSIX malloc will allow size 0, but return nullptr. pool_->deallocate(ptr,s,a); + deallocs++; used -= s; } @@ -251,9 +271,13 @@ namespace os::mem { return false; } - bool Pmr_resource::empty() { + bool Pmr_resource::full() { return used >= capacity(); } + + bool Pmr_resource::empty() { + return used == 0; + } } #endif diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index 44284693e0..5c529b9cd1 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -20,7 +20,7 @@ #include #include -CASE("os::mem::default_pmr_resource") { +CASE("pmr::default_pmr_resource") { std::pmr::vector numbers; for (int i = 0; i < 1000; i++) { @@ -31,11 +31,11 @@ CASE("os::mem::default_pmr_resource") { } -CASE("os::mem::Pmr_pool usage") { +CASE("pmr::Pmr_pool usage") { using namespace util; constexpr auto pool_cap = 40_MiB; - // Using default suballoc capacity, which is pool_cap / allocator count + // Using default resource capacity, which is pool_cap / allocator count os::mem::Pmr_pool pool{pool_cap}; @@ -43,7 +43,7 @@ CASE("os::mem::Pmr_pool usage") { auto res = pool.get_resource(); EXPECT(res->capacity() == pool_cap); - auto sub_free = res->bytes_free(); + auto sub_free = res->allocatable(); std::pmr::polymorphic_allocator alloc{res.get()}; std::pmr::vector numbers{alloc}; @@ -58,7 +58,7 @@ CASE("os::mem::Pmr_pool usage") { for (auto i = 0; i < numbers.size(); i++) EXPECT(numbers.at(i) == test::random.at(i)); - EXPECT(res->bytes_free() <= sub_free - 1000); + EXPECT(res->allocatable() <= sub_free - 1000); // Using small res capacity @@ -66,12 +66,12 @@ CASE("os::mem::Pmr_pool usage") { os::mem::Pmr_pool pool2{pool_cap, alloc_cap}; EXPECT(pool2.total_capacity() == pool_cap); - EXPECT(pool2.bytes_free() == pool_cap); + EXPECT(pool2.allocatable() == pool_cap); auto res2 = pool2.get_resource(); EXPECT(res2->capacity() == alloc_cap); - auto sub_free2 = res2->bytes_free(); + auto sub_free2 = res2->allocatable(); std::pmr::polymorphic_allocator alloc2{res2.get()}; std::pmr::vector numbers2{alloc2}; @@ -81,26 +81,26 @@ CASE("os::mem::Pmr_pool usage") { EXPECT(numbers2.capacity() == 1000); - EXPECT(res2->bytes_free() <= sub_free2 - 1000); + EXPECT(res2->allocatable() <= sub_free2 - 1000); EXPECT_THROWS(numbers2.reserve(8_KiB)); numbers2.clear(); numbers2.shrink_to_fit(); - EXPECT(res2->bytes_free() == alloc_cap); - EXPECT(res2->bytes_used() == 0); - EXPECT(pool2.bytes_free() == pool_cap); - EXPECT(pool2.bytes_used() == 0); + EXPECT(res2->allocatable() == alloc_cap); + EXPECT(res2->allocated() == 0); + EXPECT(pool2.allocatable() == pool_cap); + EXPECT(pool2.allocated() == 0); numbers2.push_back(1); EXPECT(numbers2.capacity() > 0); EXPECT(numbers2.capacity() < 1000); - EXPECT(res2->bytes_free() < alloc_cap); - EXPECT(res2->bytes_free() > alloc_cap - 1000); + EXPECT(res2->allocatable() < alloc_cap); + EXPECT(res2->allocatable() > alloc_cap - 1000); } -CASE("os::mem::Pmr_suballoc usage") { +CASE("pmr::resource usage") { using namespace util; constexpr auto pool_cap = 400_KiB; @@ -125,7 +125,7 @@ CASE("os::mem::Pmr_suballoc usage") { auto res = pool.get_resource(); EXPECT(res != nullptr); EXPECT(res->capacity() == resource_cap); - EXPECT(res->bytes_used() == 0); + EXPECT(res->allocated() == 0); EXPECT(res->pool() == pool_ptr); resources.emplace_back(std::move(res)); } @@ -157,13 +157,13 @@ CASE("os::mem::Pmr_suballoc usage") { allocations.push_back(p3); allocations.push_back(p4); - EXPECT(res->empty()); + EXPECT(res->full()); EXPECT_THROWS(res->allocate(1_KiB)); - EXPECT(res->empty()); + EXPECT(res->full()); } - // The pool is now also empty - EXPECT(pool.empty()); + // The pool is now also full + EXPECT(pool.full()); // So resources can't allocate more from it. // (Pools pmr api is hidden behind implementation for now.) @@ -185,9 +185,9 @@ CASE("os::mem::Pmr_suballoc usage") { EXPECT(pool_ptr->free_resources() == resource_count); EXPECT(pool_ptr->used_resources() == 0); - // Pool is still empty - deallocations haven't happened - EXPECT(pool.empty()); - EXPECT(pool.bytes_free() == 0); + // Pool is still full - deallocations haven't happened + EXPECT(pool.full()); + EXPECT(pool.allocatable() == 0); // NOTE: it's currently possible to deallocate directly to the detail::Pool_ptr // this is an implementation detail prone to change as each allocator's state @@ -195,25 +195,31 @@ CASE("os::mem::Pmr_suballoc usage") { for (auto* alloc : allocations) pool_ptr->deallocate(alloc, 1_KiB); - EXPECT(not pool.empty()); - EXPECT(pool.bytes_free() == pool_cap); + EXPECT(not pool.full()); + EXPECT(pool.allocatable() == pool_cap); // Each resource's state is remembered as it's passed back and forth. // ...There's now no way of fetching any resources auto res_tricked = pool.get_resource(); - EXPECT(res_tricked->empty()); - EXPECT(res_tricked->bytes_free() == 0); + EXPECT(pool.resource_count() == resource_count); + //EXPECT(res_tricked->full()); + EXPECT(res_tricked->allocatable() == 0); EXPECT_THROWS(res_tricked->allocate(1_KiB)); res_tricked.reset(); pool_ptr->clear_free_resources(); auto res2 = pool.get_resource(); - EXPECT(not res2->empty()); - EXPECT(res2->bytes_free() == resource_cap); + EXPECT(not res2->full()); + EXPECT(res2->allocatable() == resource_cap); auto ptr = res2->allocate(1_KiB); EXPECT(ptr != nullptr); res2->deallocate(ptr, 1_KiB); - EXPECT(res2->bytes_free() == resource_cap); + EXPECT(res2->allocatable() == resource_cap); + +} + +CASE("pmr::Resource usage") { + using namespace util; } From 0b6fde7207207d9f6a2c4ab4e2e0853a6e7ab267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 30 Nov 2018 13:58:58 +0100 Subject: [PATCH 15/93] util: Make it possible to default construct pmr alloc --- api/util/alloc_pmr.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp index 5cbac6dfc6..4ce2e9cfca 100644 --- a/api/util/alloc_pmr.hpp +++ b/api/util/alloc_pmr.hpp @@ -50,6 +50,7 @@ namespace os::mem { inline Pmr_pool(size_t capacity, size_t cap_suballoc = 0, size_t max_rescount = default_max_resources); + inline Pmr_pool() = default; inline Resource_ptr get_resource(); inline void return_resource(Resource* res); inline std::size_t resource_capacity(); From 660790e43697bb307d39ff91bd91845608d7eaf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Mon, 3 Dec 2018 10:45:47 +0100 Subject: [PATCH 16/93] tcp: Support custom allocators for read buffers --- api/net/tcp/common.hpp | 5 +++-- api/net/tcp/connection.hpp | 3 +++ api/net/tcp/read_buffer.hpp | 3 +++ api/net/tcp/read_request.hpp | 4 +++- api/net/tcp/tcp.hpp | 19 ++++++++++++++++--- lib/LiveUpdate/serialize_tcp.cpp | 2 +- src/CMakeLists.txt | 1 + src/net/tcp/connection.cpp | 3 ++- src/net/tcp/read_buffer.cpp | 13 ++++++++++++- src/net/tcp/read_request.cpp | 9 +++++---- src/net/tcp/tcp.cpp | 4 ++++ 11 files changed, 53 insertions(+), 13 deletions(-) diff --git a/api/net/tcp/common.hpp b/api/net/tcp/common.hpp index f78dca8615..99568969cc 100644 --- a/api/net/tcp/common.hpp +++ b/api/net/tcp/common.hpp @@ -52,8 +52,9 @@ namespace net { static const std::chrono::milliseconds default_dack_timeout {40}; using namespace util::literals; - static constexpr size_t default_min_bufsize {4_KiB}; - static constexpr size_t default_max_bufsize {256_KiB}; + static constexpr size_t default_min_bufsize {4_KiB}; + static constexpr size_t default_max_bufsize {256_KiB}; + static constexpr size_t default_total_bufsize {64_MiB}; using Address = net::Addr; diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index 0e8e284273..39ab8c9258 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -31,6 +31,8 @@ #include #include +#include + namespace net { // Forward declaration of the TCP object class TCP; @@ -625,6 +627,7 @@ class Connection { /** The given read request */ std::unique_ptr read_request; + os::mem::Pmr_pool::Resource_ptr bufalloc{nullptr}; /** Queue for write requests to process */ Write_queue writeq; diff --git a/api/net/tcp/read_buffer.hpp b/api/net/tcp/read_buffer.hpp index ac4899326b..7b657bee76 100644 --- a/api/net/tcp/read_buffer.hpp +++ b/api/net/tcp/read_buffer.hpp @@ -34,6 +34,7 @@ namespace tcp { */ class Read_buffer { public: + using Alloc = os::mem::buffer::allocator_type; /** * @brief Construct a read buffer. * Min and max need to be power of 2. @@ -44,6 +45,8 @@ class Read_buffer { */ Read_buffer(const seq_t start, const size_t min, const size_t max); + Read_buffer(const seq_t start, const size_t min, const size_t max, const Alloc& alloc); + /** * @brief Insert data into the buffer relative to the sequence number. * diff --git a/api/net/tcp/read_request.hpp b/api/net/tcp/read_request.hpp index a7dff15388..efe9e93e02 100644 --- a/api/net/tcp/read_request.hpp +++ b/api/net/tcp/read_request.hpp @@ -31,10 +31,11 @@ class Read_request { using Buffer_ptr = std::unique_ptr; using Buffer_queue = std::deque; using ReadCallback = delegate; + using Alloc = os::mem::buffer::allocator_type; static constexpr size_t buffer_limit = 2; ReadCallback callback; - Read_request(seq_t start, size_t min, size_t max, ReadCallback cb); + Read_request(seq_t start, size_t min, size_t max, ReadCallback cb, Alloc&& alloc = Alloc()); size_t insert(seq_t seq, const uint8_t* data, size_t n, bool psh = false); @@ -57,6 +58,7 @@ class Read_request { private: Buffer_queue buffers; + Alloc alloc; Read_buffer* get_buffer(const seq_t seq); diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 08f8063542..89d5b0d5b6 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -31,6 +31,7 @@ #include #include #include +#include namespace net { @@ -370,6 +371,15 @@ namespace net { uint16_t max_syn_backlog() const { return max_syn_backlog_; } + /** + * @brief Set the maximum allowed memory + * to be used by this TCP. + * + * @param[in] size The limit in bytes + */ + void set_total_bufsize(const size_t size) + { total_bufsize_ = size; } + /** * @brief Sets the minimum buffer size. * @@ -532,6 +542,12 @@ namespace net { Listeners listeners_; Connections connections_; + size_t total_bufsize_; + os::mem::Pmr_pool mempool; + + size_t min_bufsize_; + size_t max_bufsize_; + Port_utils& ports_; downstream network_layer_out_; @@ -557,9 +573,6 @@ namespace net { /** Maximum SYN queue backlog */ uint16_t max_syn_backlog_; - size_t min_bufsize_ {tcp::default_min_bufsize}; - size_t max_bufsize_ {tcp::default_max_bufsize}; - /** Stats */ uint64_t* bytes_rx_ = nullptr; uint64_t* bytes_tx_ = nullptr; diff --git a/lib/LiveUpdate/serialize_tcp.cpp b/lib/LiveUpdate/serialize_tcp.cpp index 2aac13bdad..3a4b62964c 100644 --- a/lib/LiveUpdate/serialize_tcp.cpp +++ b/lib/LiveUpdate/serialize_tcp.cpp @@ -179,7 +179,7 @@ void Connection::deserialize_from(void* addr) auto* readq = (read_buffer*) &area->vla[writeq_len]; if (readq->capacity) { - read_request = std::make_unique(readq->seq, readq->capacity, host_.max_bufsize(), nullptr); + read_request = std::make_unique(readq->seq, readq->capacity, host_.max_bufsize(), nullptr, bufalloc.get()); read_request->front().deserialize_from(readq); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0db8c89958..a74cc1c9d2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,6 +41,7 @@ set(OS_OBJECTS util/logger.cpp util/sha1.cpp util/syslog_facility.cpp util/syslogd.cpp util/uri.cpp util/percent_encoding.cpp util/tar.cpp util/path_to_regex.cpp util/config.cpp util/autoconf.cpp util/crc32.cpp + util/pmr_default.cpp crt/c_abi.c crt/ctype_b_loc.c crt/ctype_tolower_loc.c crt/string.c crt/quick_exit.cpp crt/cxx_abi.cpp hw/pci_device.cpp hw/nic.cpp hw/ps2.cpp hw/serial.cpp hw/vga_gfx.cpp diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 26a8b308bd..d7712d82fe 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -63,7 +63,8 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) (void) recv_bufsz; if(read_request == nullptr) { - read_request.reset(new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), cb)); + read_request.reset( + new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), cb, bufalloc.get())); } // read request is already set, only reset if new size. else diff --git a/src/net/tcp/read_buffer.cpp b/src/net/tcp/read_buffer.cpp index 265d18624e..5745c969d1 100644 --- a/src/net/tcp/read_buffer.cpp +++ b/src/net/tcp/read_buffer.cpp @@ -31,6 +31,17 @@ Read_buffer::Read_buffer(const seq_t startv, const size_t min, const size_t max) buf->reserve(min); } +Read_buffer::Read_buffer(const seq_t startv, const size_t min, const size_t max, + const Alloc& alloc) + : buf(tcp::construct_buffer(alloc)), + start{startv}, cap{max}, hole{0} +{ + Expects(util::bits::is_pow2(cap)); + Expects(util::bits::is_pow2(min)); + Expects(cap >= min); + buf->reserve(min); +} + size_t Read_buffer::insert(const seq_t seq, const uint8_t* data, size_t len, bool push) { assert(buf != nullptr && "Buffer seems to be stolen, make sure to renew()"); @@ -91,7 +102,7 @@ void Read_buffer::reset_buffer_if_needed() // if the buffer isnt unique, create a new one else { - buf = tcp::construct_buffer(); + buf = tcp::construct_buffer(buf->get_allocator()); } // This case is when we need a small buffer in front of diff --git a/src/net/tcp/read_request.cpp b/src/net/tcp/read_request.cpp index 6003acfa35..1fba23ba71 100644 --- a/src/net/tcp/read_request.cpp +++ b/src/net/tcp/read_request.cpp @@ -20,10 +20,11 @@ namespace net { namespace tcp { - Read_request::Read_request(seq_t start, size_t min, size_t max, ReadCallback cb) - : callback{cb} + Read_request::Read_request(seq_t start, size_t min, size_t max, + ReadCallback cb, Alloc&& alloc) + : callback{cb}, alloc{alloc} { - buffers.push_back(std::make_unique(start, min, max)); + buffers.push_back(std::make_unique(start, min, max, alloc)); } size_t Read_request::insert(seq_t seq, const uint8_t* data, size_t n, bool psh) @@ -124,7 +125,7 @@ namespace tcp { // we probably need to create multiple buffers, // ... or just decide we only support gaps of 1 buffer size. buffers.push_back( - std::make_unique(cur_back->end_seq(), cur_back->capacity(), cur_back->capacity())); + std::make_unique(cur_back->end_seq(), cur_back->capacity(), cur_back->capacity(), alloc)); auto& back = buffers.back(); //printf("new buffer added start=%u end=%u, fits(%lu)=%lu\n", diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index ca80d2abb4..1b37c9fd59 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -41,6 +41,9 @@ TCP::TCP(IPStack& inet, bool smp_enable) : inet_{inet}, listeners_(), connections_(), + total_bufsize_{default_total_bufsize}, + mempool{total_bufsize_}, + min_bufsize_{default_min_bufsize}, max_bufsize_{default_max_bufsize}, ports_(inet.tcp_ports()), writeq(), max_seg_lifetime_{default_msl}, // 30s @@ -512,6 +515,7 @@ void TCP::add_connection(tcp::Connection_ptr conn) { debug(" Connection added %s \n", conn->to_string().c_str()); conn->_on_cleanup({this, &TCP::close_connection}); + conn->bufalloc = mempool.get_resource(); connections_.emplace(conn->tuple(), conn); } From be81d211aee7c6dcd9817cedd763a7d188f94954 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Mon, 3 Dec 2018 16:26:12 +0100 Subject: [PATCH 17/93] tcp: WIP different approaches for adjusting receive window --- api/net/tcp/tcp.hpp | 5 +++- src/net/tcp/connection.cpp | 59 +++++++++++++++++++++++++++++++++----- src/net/tcp/tcp.cpp | 3 ++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 89d5b0d5b6..4cd221ee56 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -378,7 +378,10 @@ namespace net { * @param[in] size The limit in bytes */ void set_total_bufsize(const size_t size) - { total_bufsize_ = size; } + { + total_bufsize_ = size; + mempool.set_total_capacity(total_bufsize_); + } /** * @brief Sets the minimum buffer size. diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index d7712d82fe..45a5f01e17 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -63,6 +63,7 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) (void) recv_bufsz; if(read_request == nullptr) { + Expects(bufalloc != nullptr); read_request.reset( new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), cb, bufalloc.get())); } @@ -687,15 +688,26 @@ void Connection::recv_data(const Packet_view& in) { Expects(in.has_tcp_data()); - // just drop the packet if we don't have a recv wnd. - // this is really awful and probably unnecesseary, - // since it could be that we already preallocated that memory in our vector. - // i also think we shouldn't reach this point due to State::check_seq checking + // just drop the packet if we don't have a recv wnd / buffer available. + // this shouldn't be necessary with well behaved connections. + // I also think we shouldn't reach this point due to State::check_seq checking // if we're inside the window. if packet is out of order tho we can change the RCV wnd (i think). - if(recv_wnd_getter() == 0) { + if(UNLIKELY(bufalloc->allocatable() < host_.max_bufsize())) { drop(in, Drop_reason::RCV_WND_ZERO); + return; } + size_t length = in.tcp_data_length(); + + /* + if(UNLIKELY(cb.RCV.WND < length)) + { + printf("DROP: Receive window too small - my window is now: %u \n", cb.RCV.WND); + drop(in, Drop_reason::RCV_WND_ZERO); + return; + } + */ + // Keep track if a packet is being sent during the async read callback const auto snd_nxt = cb.SND.NXT; @@ -703,7 +715,6 @@ void Connection::recv_data(const Packet_view& in) // The packet we expect if(cb.RCV.NXT == in.seq()) { - size_t length = in.tcp_data_length(); // If we had packet loss before (and SACK is on) // we need to clear up among the blocks @@ -734,7 +745,41 @@ void Connection::recv_data(const Packet_view& in) // this ensures that the data we ACK is actually put in our buffer. Ensures(recv == length); // adjust the rcv wnd to (maybe) new value - cb.RCV.WND = recv_wnd_getter(); + + // LET APPLICATION REPORT + // cb.RCV.WND = recv_wnd_getter(); + + // PRECISE REPORTING + /* + const auto& rbuf = read_request->front(); + auto remaining = rbuf.capacity() - rbuf.size(); + auto win = (bufalloc->allocatable() + remaining) - rbuf.capacity(); + //auto max = read_request->front().capacity(); + //win = (win < max) ? (rbuf.capacity() - rbuf.size()) : win - max; + cb.RCV.WND = win; + */ + + // REPORT CHUNKWISE + /* + //auto allocatable = bufalloc->allocatable(); + const auto& rbuf = read_request->front(); + + auto win = cb.RCV.WND; + if (bufalloc->allocatable() < rbuf.capacity()) { + printf("[connection] Allocatable data is less than capacity. Win 0. \n"); + win = 0; + } else { + win = bufalloc->allocatable() - rbuf.capacity(); + } + + cb.RCV.WND = win; + */ + + + // REPORT CONSTANT + cb.RCV.WND = bufalloc->allocatable(); + //cb.RCV.WND = 64_MiB; + //cb.RCV.WND = std::max(bufalloc->allocatable(), 4_MiB); } } // Packet out of order diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index 1b37c9fd59..3a4805aa72 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -516,6 +516,7 @@ void TCP::add_connection(tcp::Connection_ptr conn) { debug(" Connection added %s \n", conn->to_string().c_str()); conn->_on_cleanup({this, &TCP::close_connection}); conn->bufalloc = mempool.get_resource(); + Expects(conn->bufalloc != nullptr); connections_.emplace(conn->tuple(), conn); } @@ -530,6 +531,8 @@ Connection_ptr TCP::create_connection(Socket local, Socket remote, ConnectCallba ) ).first->second; conn->_on_cleanup({this, &TCP::close_connection}); + conn->bufalloc = mempool.get_resource(); + Expects(conn->bufalloc != nullptr); return conn; } From 6ce443c5f03406c71d1a3aac6a6982fd5f6a0456 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Mon, 3 Dec 2018 17:08:38 +0100 Subject: [PATCH 18/93] PMR: fix an issue with resource counting --- api/util/detail/alloc_pmr.hpp | 44 ++++++++++++++++------------ test/util/unit/pmr_alloc_test.cpp | 48 ++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 31 deletions(-) diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 5bd60e4836..272736c478 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -63,8 +63,8 @@ namespace os::mem::detail { allocated_ -= size; } - bool do_is_equal(const std::pmr::memory_resource&) const noexcept override { - return true; + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + return *this == other; } Resource_ptr resource_from_raw(Resource* raw) { @@ -77,42 +77,48 @@ namespace os::mem::detail { } Resource_ptr new_resource() { - Expects(used_resources_ < max_resources_); + Expects(resource_count() < max_resources_); auto res = resource_from_raw(new Pmr_resource(shared_ptr())); used_resources_++; Ensures(res != nullptr); - Ensures(used_resources_ <= max_resources_); + Ensures(resource_count() <= max_resources_); return res; } + Resource_ptr release_front() { + auto released = std::move(free_resources_.front()); + free_resources_.pop_front(); + used_resources_++; + return released; + } + + void return_resource(Resource* raw) { + Expects(used_resources_ > 0); + auto res_ptr = resource_from_raw(raw); + used_resources_--; + free_resources_.emplace_back(std::move(res_ptr)); + } + Resource_ptr get_resource() { if (! free_resources_.empty()) { - auto res = std::move(free_resources_.front()); - free_resources_.pop_front(); - if (UNLIKELY(! res->empty() and used_resources_ < max_resources_)) { - free_resources_.emplace_back(std::move(res)); + auto& res = free_resources_.front(); + + if (UNLIKELY(! res->empty() and resource_count() < max_resources_)) { return new_resource(); } - - return res; + return release_front(); } - if (used_resources_ >= max_resources_) + if (resource_count() >= max_resources_) return nullptr; return new_resource(); } - void return_resource(Resource* raw) { - Expects(used_resources_ > 0); - auto res_ptr = resource_from_raw(raw); - used_resources_--; - free_resources_.emplace_back(std::move(res_ptr)); - } std::shared_ptr shared_ptr() { return shared_from_this(); @@ -246,6 +252,8 @@ namespace os::mem { void* Pmr_resource::do_allocate(std::size_t size, std::size_t align) { auto cap = capacity(); if (UNLIKELY(size + used > cap)) { + printf(" ERROR: Failed to alloc %zu - currently allocated %zu capacity %zu\n", + this, size, used, cap); throw std::bad_alloc(); } @@ -266,7 +274,7 @@ namespace os::mem { bool Pmr_resource::do_is_equal(const std::pmr::memory_resource& other) const noexcept { if (const auto* other_ptr = dynamic_cast(&other)) { - return pool_ == other_ptr->pool_; + return this == other_ptr; } return false; } diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index 5c529b9cd1..8816e62297 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -14,11 +14,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -// #define DEBUG_UNIT +//#define DEBUG_UNIT #include #include #include +#include CASE("pmr::default_pmr_resource") { @@ -48,6 +49,7 @@ CASE("pmr::Pmr_pool usage") { std::pmr::polymorphic_allocator alloc{res.get()}; std::pmr::vector numbers{alloc}; + EXPECT(numbers.capacity() < 1000); numbers.reserve(1000); EXPECT(numbers.capacity() == 1000); @@ -60,6 +62,23 @@ CASE("pmr::Pmr_pool usage") { EXPECT(res->allocatable() <= sub_free - 1000); + // Apparently the same allocator can just be passed to any other container. + std::pmr::map maps{alloc}; + maps[42] = "Allocators are interchangeable"; + EXPECT(maps[42] == "Allocators are interchangeable"); + + // Allocator storage is kept by the container itself. + using my_alloc = std::pmr::polymorphic_allocator; + auto unique_alloc = std::make_unique(res.get()); + + std::pmr::vector my_strings(*unique_alloc); + my_strings.push_back("Hello PMR"); + my_strings.push_back("Hello again PMR"); + + unique_alloc.reset(); + my_strings.push_back("Still works"); + EXPECT(my_strings.back() == "Still works"); + // Using small res capacity constexpr auto alloc_cap = 4_KiB; @@ -96,7 +115,6 @@ CASE("pmr::Pmr_pool usage") { EXPECT(numbers2.capacity() < 1000); EXPECT(res2->allocatable() < alloc_cap); EXPECT(res2->allocatable() > alloc_cap - 1000); - } @@ -139,7 +157,7 @@ CASE("pmr::resource usage") { EXPECT(pool_ptr->free_resources() == 0); EXPECT(pool_ptr->used_resources() == resource_count); - std::vector allocations{}; + std::unordered_map> allocations{}; // Drain all the resources for (auto& res : resources) { @@ -152,10 +170,10 @@ CASE("pmr::resource usage") { EXPECT(p3 != nullptr); EXPECT(p4 != nullptr); - allocations.push_back(p1); - allocations.push_back(p2); - allocations.push_back(p3); - allocations.push_back(p4); + allocations.at(res.get()).push_back(p1); + allocations.at(res.get()).push_back(p2); + allocations.at(res.get()).push_back(p3); + allocations.at(res.get()).push_back(p4); EXPECT(res->full()); EXPECT_THROWS(res->allocate(1_KiB)); @@ -192,34 +210,38 @@ CASE("pmr::resource usage") { // NOTE: it's currently possible to deallocate directly to the detail::Pool_ptr // this is an implementation detail prone to change as each allocator's state // won't e updated accordingly. - for (auto* alloc : allocations) - pool_ptr->deallocate(alloc, 1_KiB); + + for (auto[pool, vec] : allocations) + for (auto alloc : vec) + pool->deallocate(alloc, 1_KiB); EXPECT(not pool.full()); EXPECT(pool.allocatable() == pool_cap); // Each resource's state is remembered as it's passed back and forth. - // ...There's now no way of fetching any resources + // ...There's now no way of fetching any non-full resources auto res_tricked = pool.get_resource(); + EXPECT(pool.resource_count() == resource_count); - //EXPECT(res_tricked->full()); + EXPECT(res_tricked->full()); EXPECT(res_tricked->allocatable() == 0); EXPECT_THROWS(res_tricked->allocate(1_KiB)); res_tricked.reset(); + pool_ptr->clear_free_resources(); auto res2 = pool.get_resource(); + EXPECT(not res2->full()); EXPECT(res2->allocatable() == resource_cap); auto ptr = res2->allocate(1_KiB); EXPECT(ptr != nullptr); res2->deallocate(ptr, 1_KiB); EXPECT(res2->allocatable() == resource_cap); - } -CASE("pmr::Resource usage") { +CASE("pmr::Resource performance") { using namespace util; } From 16713856d5f45c320dfdeaa191600ec59fa31df5 Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Tue, 4 Dec 2018 15:21:40 +0100 Subject: [PATCH 19/93] microLB: Signal home only when something changed --- lib/microLB/micro_lb/balancer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 4a456275b8..2d200caf63 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -367,12 +367,12 @@ namespace microLB this->pool.push_back(std::make_unique(conn)); // stop any active check this->stop_active_check(); + // signal change in pool + this->pool_signal(); } else { this->restart_active_check(); } - // signal change in pool - this->pool_signal(); }); } net::Stream_ptr Node::get_connection() From bcb6d7328211ec793303aa913dddf88d68172e20 Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Tue, 4 Dec 2018 15:16:01 +0100 Subject: [PATCH 20/93] microLB: Prune dead clients before connecting out, and show active connection attempts --- lib/microLB/micro_lb/balancer.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 2d200caf63..d7f0840354 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -12,7 +12,7 @@ #define CONNECT_TIMEOUT 10s #define CONNECT_THROW_PERIOD 20s -#define LB_VERBOSE 0 +#define LB_VERBOSE 1 #if LB_VERBOSE #define LBOUT(fmt, ...) printf(fmt, ##__VA_ARGS__) #else @@ -68,6 +68,7 @@ namespace microLB } void Balancer::handle_queue() { + printf("handle_queue\n"); // check waitq while (nodes.pool_size() > 0 && queue.empty() == false) { @@ -95,11 +96,22 @@ namespace microLB } void Balancer::handle_connections() { + printf("handle_connections\n"); // stop any rethrow timer since this is a de-facto retry if (this->throw_retry_timer != Timers::UNUSED_ID) { Timers::stop(this->throw_retry_timer); this->throw_retry_timer = Timers::UNUSED_ID; } + + // prune dead clients because the "number of clients" is being + // used in a calculation right after this to determine how many + // nodes to connect to + auto new_end = std::remove_if(queue.begin(), queue.end(), + [](Waiting& client) { + return client.conn == nullptr || client.conn->is_connected() == false; + }); + queue.erase(new_end, queue.end()); + // calculating number of connection attempts to create int np_connecting = nodes.pool_connecting(); int estimate = queue.size() - (np_connecting + nodes.pool_size()); @@ -129,6 +141,7 @@ namespace microLB : conn(std::move(incoming)), total(0) { assert(this->conn != nullptr); + assert(this->conn->is_connected()); // queue incoming data from clients not yet // assigned to a node this->conn->on_read(READQ_PER_CLIENT, @@ -315,6 +328,11 @@ namespace microLB this->active_timer = Timers::periodic( ACTIVE_INITIAL_PERIOD, ACTIVE_CHECK_PERIOD, {this, &Node::perform_active_check}); + LBOUT("Node %d restarting active check (and is inactive)\n", this->m_idx); + } + else + { + LBOUT("Node %d still trying to connect...\n", this->m_idx); } } } From a6cba3c41b149a1e8c763eb4da9ff1764fbf6990 Mon Sep 17 00:00:00 2001 From: fwsGonzo Date: Tue, 4 Dec 2018 15:24:24 +0100 Subject: [PATCH 21/93] microLB: Fix some printouts after cherry-pick --- lib/microLB/micro_lb/balancer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index d7f0840354..390ca45625 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -12,7 +12,7 @@ #define CONNECT_TIMEOUT 10s #define CONNECT_THROW_PERIOD 20s -#define LB_VERBOSE 1 +#define LB_VERBOSE 0 #if LB_VERBOSE #define LBOUT(fmt, ...) printf(fmt, ##__VA_ARGS__) #else @@ -328,11 +328,13 @@ namespace microLB this->active_timer = Timers::periodic( ACTIVE_INITIAL_PERIOD, ACTIVE_CHECK_PERIOD, {this, &Node::perform_active_check}); - LBOUT("Node %d restarting active check (and is inactive)\n", this->m_idx); + LBOUT("Node %s restarting active check (and is inactive)\n", + this->addr.to_string().c_str()); } else { - LBOUT("Node %d still trying to connect...\n", this->m_idx); + LBOUT("Node %s still trying to connect...\n", + this->addr.to_string().c_str()); } } } From d34779c7dac4b42dcadde3ca68a6f9f2a06f4582 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 5 Dec 2018 09:39:10 +0100 Subject: [PATCH 22/93] tcp: add getter for mempool --- api/net/tcp/tcp.hpp | 8 ++++++-- src/net/tcp/tcp.cpp | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 4cd221ee56..37ea2a4747 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -380,7 +380,11 @@ namespace net { void set_total_bufsize(const size_t size) { total_bufsize_ = size; - mempool.set_total_capacity(total_bufsize_); + mempool_.set_total_capacity(total_bufsize_); + } + + const os::mem::Pmr_pool& mempool() { + return mempool_; } /** @@ -546,7 +550,7 @@ namespace net { Connections connections_; size_t total_bufsize_; - os::mem::Pmr_pool mempool; + os::mem::Pmr_pool mempool_; size_t min_bufsize_; size_t max_bufsize_; diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index 3a4805aa72..ba97ca5635 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -42,7 +42,7 @@ TCP::TCP(IPStack& inet, bool smp_enable) : listeners_(), connections_(), total_bufsize_{default_total_bufsize}, - mempool{total_bufsize_}, + mempool_{total_bufsize_}, min_bufsize_{default_min_bufsize}, max_bufsize_{default_max_bufsize}, ports_(inet.tcp_ports()), writeq(), @@ -515,7 +515,7 @@ void TCP::add_connection(tcp::Connection_ptr conn) { debug(" Connection added %s \n", conn->to_string().c_str()); conn->_on_cleanup({this, &TCP::close_connection}); - conn->bufalloc = mempool.get_resource(); + conn->bufalloc = mempool_.get_resource(); Expects(conn->bufalloc != nullptr); connections_.emplace(conn->tuple(), conn); } @@ -531,7 +531,7 @@ Connection_ptr TCP::create_connection(Socket local, Socket remote, ConnectCallba ) ).first->second; conn->_on_cleanup({this, &TCP::close_connection}); - conn->bufalloc = mempool.get_resource(); + conn->bufalloc = mempool_.get_resource(); Expects(conn->bufalloc != nullptr); return conn; } From 159869ad5576da9a838d8ffb31f001930c8b4645 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 5 Dec 2018 09:43:31 +0100 Subject: [PATCH 23/93] tcp: reset recv_wnd_getter with reset_callbacks / cleanup --- src/net/tcp/connection.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 45a5f01e17..7b151eb259 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -117,7 +117,7 @@ void Connection::reset_callbacks() on_connect_.reset(); writeq.on_write(nullptr); on_close_.reset(); - + recv_wnd_getter.reset(); if(read_request) read_request->callback.reset(); } @@ -1163,6 +1163,7 @@ void Connection::clean_up() { on_connect_.reset(); on_disconnect_.reset(); on_close_.reset(); + recv_wnd_getter.reset(); if(read_request) read_request->callback.reset(); _on_cleanup_.reset(); From bff3d76a98bf3e9a40c2a8c7b4b8f198d253b61e Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 5 Dec 2018 10:11:35 +0100 Subject: [PATCH 24/93] pmr: implement allocations / deallocations counter --- api/util/detail/alloc_pmr.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 272736c478..bb02ec08c9 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -50,6 +50,7 @@ namespace os::mem::detail { } allocated_ += size; + allocations_++; return buf; } @@ -61,6 +62,7 @@ namespace os::mem::detail { free(ptr); allocated_ -= size; + deallocations_++; } bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { @@ -174,6 +176,14 @@ namespace os::mem::detail { return allocated_; } + std::size_t alloc_count() { + return allocations_; + } + + std::size_t dealloc_count() { + return deallocations_; + } + bool full() { return allocated_ >= cap_total_; } @@ -198,6 +208,8 @@ namespace os::mem::detail { private: std::size_t allocated_ = 0; + std::size_t allocations_ = 0; + std::size_t deallocations_ = 0; std::size_t cap_total_ = 0; std::size_t cap_suballoc_ = 0; std::size_t max_resources_ = 0; @@ -225,6 +237,8 @@ namespace os::mem { : impl{std::make_shared(sz, sz_sub, max_allocs)}{} Pmr_pool::Resource_ptr Pmr_pool::get_resource() { return impl->get_resource(); } std::size_t Pmr_pool::resource_count() { return impl->resource_count(); } + std::size_t Pmr_pool::alloc_count() { return impl->alloc_count(); } + std::size_t Pmr_pool::dealloc_count() { return impl->dealloc_count(); } void Pmr_pool::return_resource(Resource* res) { impl->return_resource(res); } bool Pmr_pool::full() { return impl->full(); } bool Pmr_pool::empty() { return impl->empty(); } From 9641e467987e8384e8f228614dd9c5f5ba2e11d2 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 5 Dec 2018 10:12:48 +0100 Subject: [PATCH 25/93] microlb: add on_close for new waiting --- lib/microLB/micro_lb/balancer.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 390ca45625..0a20e081c8 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -68,7 +68,6 @@ namespace microLB } void Balancer::handle_queue() { - printf("handle_queue\n"); // check waitq while (nodes.pool_size() > 0 && queue.empty() == false) { @@ -96,7 +95,7 @@ namespace microLB } void Balancer::handle_connections() { - printf("handle_connections\n"); + LBOUT("Handle_connections. %i waiting \n", queue.size()); // stop any rethrow timer since this is a de-facto retry if (this->throw_retry_timer != Timers::UNUSED_ID) { Timers::stop(this->throw_retry_timer); @@ -142,6 +141,14 @@ namespace microLB { assert(this->conn != nullptr); assert(this->conn->is_connected()); + + // Release connection if it closes before it's assigned to a node. + this->conn->on_close([this](){ + if (this->conn != nullptr) + this->conn->reset_callbacks(); + this->conn = nullptr; + }); + // queue incoming data from clients not yet // assigned to a node this->conn->on_read(READQ_PER_CLIENT, From 57d94059c9b8ff0aae8aada9c282621beee6c751 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 5 Dec 2018 10:34:54 +0100 Subject: [PATCH 26/93] test: update pmr test, allow 4b alignment for test memalign --- test/lest_util/os_mock.cpp | 21 +++++++++++++----- test/util/unit/buddy_alloc_test.cpp | 2 +- test/util/unit/pmr_alloc_test.cpp | 33 +++++++++++++++++------------ 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/test/lest_util/os_mock.cpp b/test/lest_util/os_mock.cpp index d92fc11216..91a80883c8 100644 --- a/test/lest_util/os_mock.cpp +++ b/test/lest_util/os_mock.cpp @@ -20,14 +20,25 @@ #include #include #include -void* memalign(size_t alignment, size_t size) { +#include + +void* memalign(size_t align, size_t size) { void* ptr {nullptr}; - int res = posix_memalign(&ptr, alignment, size); - Ensures(res == 0); + + if (align < sizeof(void*)) + align = sizeof(void*); + if (size < sizeof(void*)) + size = sizeof(void*); + + int res = posix_memalign(&ptr, align, size); + if (res == EINVAL) + printf("Error %i: posix_memalign got invalid alignment param %zu \n", res, align); + if (res == ENOMEM) + printf("Error %i: posix_memalign failed, not enough memory %zu \n", res); return ptr; } -void* aligned_alloc(size_t alignment, size_t size) { - return memalign(alignment, size); +void* aligned_alloc(size_t align, size_t size) { + return memalign(align, size); } #endif diff --git a/test/util/unit/buddy_alloc_test.cpp b/test/util/unit/buddy_alloc_test.cpp index 888d5a80ad..01d45f079d 100644 --- a/test/util/unit/buddy_alloc_test.cpp +++ b/test/util/unit/buddy_alloc_test.cpp @@ -279,7 +279,7 @@ CASE("mem::buddy random chaos with data verification"){ std::vector allocs; for (auto rnd : test::random_1k) { - auto sz = std::max(rnd % alloc.pool_size_ / 1024, alloc.min_size); + auto sz = std::max(rnd % alloc.pool_size_ / 1024, alloc.min_size); EXPECT(sz); if (not alloc.full()) { diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index 8816e62297..feed8a1644 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -14,11 +14,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -//#define DEBUG_UNIT +#define DEBUG_UNIT #include #include #include + +#if __has_include() +#include +#endif + +#include #include CASE("pmr::default_pmr_resource") { @@ -170,10 +176,10 @@ CASE("pmr::resource usage") { EXPECT(p3 != nullptr); EXPECT(p4 != nullptr); - allocations.at(res.get()).push_back(p1); - allocations.at(res.get()).push_back(p2); - allocations.at(res.get()).push_back(p3); - allocations.at(res.get()).push_back(p4); + allocations[res.get()].push_back(p1); + allocations[res.get()].push_back(p2); + allocations[res.get()].push_back(p3); + allocations[res.get()].push_back(p4); EXPECT(res->full()); EXPECT_THROWS(res->allocate(1_KiB)); @@ -215,21 +221,20 @@ CASE("pmr::resource usage") { for (auto alloc : vec) pool->deallocate(alloc, 1_KiB); + EXPECT(pool.empty()); EXPECT(not pool.full()); EXPECT(pool.allocatable() == pool_cap); - // Each resource's state is remembered as it's passed back and forth. - // ...There's now no way of fetching any non-full resources - auto res_tricked = pool.get_resource(); - EXPECT(pool.resource_count() == resource_count); - EXPECT(res_tricked->full()); - EXPECT(res_tricked->allocatable() == 0); - EXPECT_THROWS(res_tricked->allocate(1_KiB)); + auto res_reused = pool.get_resource(); + EXPECT(pool.resource_count() == resource_count); - res_tricked.reset(); + EXPECT(res_reused->empty()); + EXPECT(res_reused->allocatable() == resource_cap); + EXPECT(pool_ptr->free_resources() == resource_count - 1); + EXPECT(pool_ptr->used_resources() == 1); - pool_ptr->clear_free_resources(); + res_reused.reset(); auto res2 = pool.get_resource(); From 89f4aa475074ee731e4bcf12edff1a6f9fb3dcd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Wed, 5 Dec 2018 11:30:06 +0100 Subject: [PATCH 27/93] tcp: Replace global rcv wnd getter with one based on the connections state --- api/net/tcp/connection.hpp | 2 ++ api/net/tcp/tcp.hpp | 9 ----- src/net/tcp/connection.cpp | 70 ++++++++++++++++++-------------------- src/net/tcp/tcp.cpp | 17 --------- 4 files changed, 36 insertions(+), 62 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index 39ab8c9258..0195ed5ec1 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -861,6 +861,8 @@ class Connection { */ bool handle_ack(const Packet_view&); + uint32_t calculate_rcv_wnd() const; + /** * @brief Receive data from an incoming packet containing data. * diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 37ea2a4747..0c5c0bbd13 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -535,15 +535,6 @@ namespace net { return this->cpu_id; } - /** - * @brief Return a value that's supposed to describe how much - * a connection should announce as it's RCV WND, - * with regards to the whole system. - * - * @return A RCV WND value, maximum 1GB - */ - static uint32_t global_recv_wnd(); - private: IPStack& inet_; Listeners listeners_; diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 7b151eb259..dd0bd22dfe 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -36,7 +36,7 @@ Connection::Connection(TCP& host, Socket local, Socket remote, ConnectCallback c cb{host_.window_size()}, read_request(nullptr), writeq(), - recv_wnd_getter{TCP::global_recv_wnd}, + recv_wnd_getter{this, &Connection::calculate_rcv_wnd}, on_connect_{std::move(callback)}, on_disconnect_({this, &Connection::default_on_disconnect}), rtx_timer({this, &Connection::rtx_timeout}), @@ -653,6 +653,38 @@ void Connection::rtx_ack(const seq_t ack) { // x-rtx_q.size(), rtx_q.size()); } +uint32_t Connection::calculate_rcv_wnd() const +{ + // PRECISE REPORTING + /* + const auto& rbuf = read_request->front(); + auto remaining = rbuf.capacity() - rbuf.size(); + auto win = (bufalloc->allocatable() + remaining) - rbuf.capacity(); + //auto max = read_request->front().capacity(); + //win = (win < max) ? (rbuf.capacity() - rbuf.size()) : win - max; + return win; + */ + + // REPORT CHUNKWISE + /* + //auto allocatable = bufalloc->allocatable(); + const auto& rbuf = read_request->front(); + + auto win = cb.RCV.WND; + if (bufalloc->allocatable() < rbuf.capacity()) { + printf("[connection] Allocatable data is less than capacity. Win 0. \n"); + win = 0; + } else { + win = bufalloc->allocatable() - rbuf.capacity(); + } + + return win; + */ + + // REPORT CONSTANT + return bufalloc->allocatable(); +} + /* 7. Process the segment text @@ -745,41 +777,7 @@ void Connection::recv_data(const Packet_view& in) // this ensures that the data we ACK is actually put in our buffer. Ensures(recv == length); // adjust the rcv wnd to (maybe) new value - - // LET APPLICATION REPORT - // cb.RCV.WND = recv_wnd_getter(); - - // PRECISE REPORTING - /* - const auto& rbuf = read_request->front(); - auto remaining = rbuf.capacity() - rbuf.size(); - auto win = (bufalloc->allocatable() + remaining) - rbuf.capacity(); - //auto max = read_request->front().capacity(); - //win = (win < max) ? (rbuf.capacity() - rbuf.size()) : win - max; - cb.RCV.WND = win; - */ - - // REPORT CHUNKWISE - /* - //auto allocatable = bufalloc->allocatable(); - const auto& rbuf = read_request->front(); - - auto win = cb.RCV.WND; - if (bufalloc->allocatable() < rbuf.capacity()) { - printf("[connection] Allocatable data is less than capacity. Win 0. \n"); - win = 0; - } else { - win = bufalloc->allocatable() - rbuf.capacity(); - } - - cb.RCV.WND = win; - */ - - - // REPORT CONSTANT - cb.RCV.WND = bufalloc->allocatable(); - //cb.RCV.WND = 64_MiB; - //cb.RCV.WND = std::max(bufalloc->allocatable(), 4_MiB); + cb.RCV.WND = recv_wnd_getter(); } } // Packet out of order diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index ba97ca5635..737d7400d2 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -378,23 +378,6 @@ void TCP::reset_pmtu(Socket dest, IP4::PMTU pmtu) { } } -uint32_t TCP::global_recv_wnd() -{ - using namespace util; - - auto max_use = OS::heap_max() / 4; // TODO: make proportion into variable - auto in_use = OS::heap_usage(); - - if (in_use >= max_use) { - printf("global_recv_wnd: Receive window empty. Heap use: %zu \n", in_use); - return 0; - } - - ssize_t buf_avail = max_use - in_use; - - return std::min(buf_avail, 4_MiB); -} - void TCP::transmit(tcp::Packet_view_ptr packet) { // Generate checksum. From da87ce744884c9e7f0ae2b58b46efdc7913bcfa9 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Wed, 5 Dec 2018 12:34:31 +0100 Subject: [PATCH 28/93] tcp: avoid using delegate by default for receive window calculation --- src/net/tcp/connection.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index dd0bd22dfe..025f4a85f6 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -36,7 +36,7 @@ Connection::Connection(TCP& host, Socket local, Socket remote, ConnectCallback c cb{host_.window_size()}, read_request(nullptr), writeq(), - recv_wnd_getter{this, &Connection::calculate_rcv_wnd}, + recv_wnd_getter{nullptr}, on_connect_{std::move(callback)}, on_disconnect_({this, &Connection::default_on_disconnect}), rtx_timer({this, &Connection::rtx_timeout}), @@ -777,7 +777,7 @@ void Connection::recv_data(const Packet_view& in) // this ensures that the data we ACK is actually put in our buffer. Ensures(recv == length); // adjust the rcv wnd to (maybe) new value - cb.RCV.WND = recv_wnd_getter(); + cb.RCV.WND = (recv_wnd_getter == nullptr) ? calculate_rcv_wnd() : recv_wnd_getter(); } } // Packet out of order From cf1cd14aa1efdb7930761fd544f66d9fc047ae5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alf-Andr=C3=A9=20Walla?= Date: Wed, 5 Dec 2018 15:26:00 +0100 Subject: [PATCH 29/93] examples: Add a TCP-to-TCP transfer example --- examples/transfer/CMakeLists.txt | 41 ++++++++++++ examples/transfer/config.json | 11 +++ examples/transfer/linux/CMakeLists.txt | 18 +++++ examples/transfer/send_file.sh | 2 + examples/transfer/server.py | 37 +++++++++++ examples/transfer/service.cpp | 92 ++++++++++++++++++++++++++ examples/transfer/vm.json | 3 + 7 files changed, 204 insertions(+) create mode 100644 examples/transfer/CMakeLists.txt create mode 100644 examples/transfer/config.json create mode 100644 examples/transfer/linux/CMakeLists.txt create mode 100755 examples/transfer/send_file.sh create mode 100755 examples/transfer/server.py create mode 100644 examples/transfer/service.cpp create mode 100644 examples/transfer/vm.json diff --git a/examples/transfer/CMakeLists.txt b/examples/transfer/CMakeLists.txt new file mode 100644 index 0000000000..1e3f8b72d4 --- /dev/null +++ b/examples/transfer/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 2.8.9) + +# IncludeOS install location +if (NOT DEFINED ENV{INCLUDEOS_PREFIX}) + set(ENV{INCLUDEOS_PREFIX} /usr/local) +endif() +include($ENV{INCLUDEOS_PREFIX}/includeos/pre.service.cmake) +project (tcp) + +# Human-readable name of your service +set(SERVICE_NAME "TCP Example Service") + +# Name of your service binary +set(BINARY "tcp_example") + +# Source files to be linked with OS library parts to form bootable image +set(SOURCES + service.cpp # ...add more here + ) + +# To add your own include paths: +# set(LOCAL_INCLUDES ".") + +# Adding memdisk (expects my.disk to exist in current dir): +# set(MEMDISK ${CMAKE_SOURCE_DIR}/my.disk) + +# DRIVERS / PLUGINS: + +set(DRIVERS + virtionet # Virtio networking + # ... Others from IncludeOS/src/drivers + ) + +set(PLUGINS + # syslogd # Syslog over UDP + # ...others + ) + + +# include service build script +include($ENV{INCLUDEOS_PREFIX}/includeos/post.service.cmake) diff --git a/examples/transfer/config.json b/examples/transfer/config.json new file mode 100644 index 0000000000..26564e1325 --- /dev/null +++ b/examples/transfer/config.json @@ -0,0 +1,11 @@ +{ + "net" : [ + { + "iface": 0, + "config": "static", + "address": "10.0.0.42", + "netmask": "255.255.255.0", + "gateway": "10.0.0.1" + } + ] +} diff --git a/examples/transfer/linux/CMakeLists.txt b/examples/transfer/linux/CMakeLists.txt new file mode 100644 index 0000000000..5a6fca00dc --- /dev/null +++ b/examples/transfer/linux/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 2.8.9) +if (NOT DEFINED ENV{INCLUDEOS_PREFIX}) + set(ENV{INCLUDEOS_PREFIX} /usr/local) +endif() +project (service C CXX) + +# Human-readable name of your service +set(SERVICE_NAME "TCP Transfer From Linux") + +# Name of your service binary +set(BINARY "tcp_linux") + +# Source files to be linked with OS library parts to form bootable image +set(SOURCES + ../service.cpp + ) + +include($ENV{INCLUDEOS_PREFIX}/includeos/linux.service.cmake) diff --git a/examples/transfer/send_file.sh b/examples/transfer/send_file.sh new file mode 100755 index 0000000000..66fd069b9c --- /dev/null +++ b/examples/transfer/send_file.sh @@ -0,0 +1,2 @@ +#!/bin/bash +dd if=/dev/zero bs=1280 count=1048576 > /dev/tcp/10.0.0.42/81 diff --git a/examples/transfer/server.py b/examples/transfer/server.py new file mode 100755 index 0000000000..8bdc9400aa --- /dev/null +++ b/examples/transfer/server.py @@ -0,0 +1,37 @@ +import socket +import sys + +# Create a TCP/IP socket +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +# Bind the socket to the port +server_address = ('10.0.0.1', 1337) +print 'starting up on %s port %s' % server_address +sock.bind(server_address) + +# Listen for incoming connections +sock.listen(5) + +while True: + # Wait for a connection + print 'waiting for a connection' + connection, client_address = sock.accept() + + try: + print 'connection from', client_address + bytes = 0 + + while True: + data = connection.recv(8192) + if data: + bytes += len(data) + #print 'received: %d' % len(data) + connection.sendall(data) + else: + print 'received %d bytes' % bytes + print 'closing', client_address + break + + finally: + # Clean up the connection + connection.close() diff --git a/examples/transfer/service.cpp b/examples/transfer/service.cpp new file mode 100644 index 0000000000..0da15c0011 --- /dev/null +++ b/examples/transfer/service.cpp @@ -0,0 +1,92 @@ +// This file is a part of the IncludeOS unikernel - www.includeos.org +// +// Copyright 2015-2016 Oslo and Akershus University College of Applied Sciences +// and Alfred Bratterud +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +/** + * An example to show incoming and outgoing TCP Connections. + * In this example, IncludeOS is listening on port 80. + * + * Data received on port 80 will be redirected to a + * outgoing connection to a (in this case) python server (server.py) + * + * Data received from the python server connection + * will be redirected back to the client. + * + * To try it out, use netcat to connect to this IncludeOS instance. +**/ + +using Connection_ptr = net::tcp::Connection_ptr; +using Disconnect = net::tcp::Connection::Disconnect; + +// Address to our python server: 10.0.2.2:1337 +// @note: This may have to be modified depending on network and server settings. +net::Socket python_server{ {10,0,0,1} , 1337}; + +void Service::start() +{ +#ifdef USERSPACE_LINUX + extern void create_network_device(int N, const char* route, const char* ip); + create_network_device(0, "10.0.0.0/24", "10.0.0.1"); +#endif + auto& inet = net::Super_stack::get(0); + inet.network_config( + { 10, 0, 0, 42 }, // IP + { 255,255,255, 0 }, // Netmask + { 10, 0, 0, 1 }, // Gateway + { 10, 0, 0, 1 }); // DNS + + // Set up a TCP server on port 81 + auto& server = inet.tcp().listen(81); + printf("Server listening: %s \n", server.local().to_string().c_str()); + + // When someone connects to our server + server.on_connect( + [&inet] (Connection_ptr client) { + printf("Connected [Client]: %s\n", client->to_string().c_str()); + // Make an outgoing connection to our python server + auto outgoing = inet.tcp().connect(python_server); + // When outgoing connection to python sever is established + outgoing->on_connect( + [client] (Connection_ptr python) { + if (!python) { + printf("Connection failed!\n"); + return; + } + printf("Connected [Python]: %s\n", python->to_string().c_str()); + + // Setup handlers for when data is received on client and python connection + // When client reads data + client->on_read(1024, [python](auto buf) { + python->write(buf); + }); + + // When client is disconnecting + client->on_disconnect([python](Connection_ptr, Disconnect reason) { + printf("Disconnected [Client]: %s\n", reason.to_string().c_str()); + python->close(); + }); + + // When python is disconnecting + python->on_disconnect([client](Connection_ptr, Disconnect reason) { + printf("Disconnected [Python]: %s\n", reason.to_string().c_str()); + client->close(); + }); + }); // << onConnect (outgoing (python)) + }); // << onConnect (client) +} diff --git a/examples/transfer/vm.json b/examples/transfer/vm.json new file mode 100644 index 0000000000..7d0b112a2f --- /dev/null +++ b/examples/transfer/vm.json @@ -0,0 +1,3 @@ +{ + "mem" : 128 +} From dfa9c4e89d8aa1b9256ca746886f118f306fae5e Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 6 Dec 2018 11:58:33 +0100 Subject: [PATCH 30/93] pmr: add on_non_full event to notify when not full anymore --- api/util/alloc_pmr.hpp | 6 ++++ api/util/detail/alloc_pmr.hpp | 9 ++++-- test/util/unit/pmr_alloc_test.cpp | 46 ++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp index 4ce2e9cfca..a05f9f6f6a 100644 --- a/api/util/alloc_pmr.hpp +++ b/api/util/alloc_pmr.hpp @@ -73,6 +73,7 @@ namespace os::mem { class Pmr_resource : public std::pmr::memory_resource { public: using Pool_ptr = detail::Pool_ptr; + using Event = delegate; inline Pmr_resource(Pool_ptr p); inline Pool_ptr pool(); inline void* do_allocate(std::size_t size, std::size_t align) override; @@ -85,11 +86,16 @@ namespace os::mem { inline std::size_t dealloc_count(); inline bool full(); inline bool empty(); + + /** Fires when the resource has been full and is not full anymore **/ + void on_non_full(Event e){ non_full = e; } + private: Pool_ptr pool_; std::size_t used = 0; std::size_t allocs = 0; std::size_t deallocs = 0; + Event non_full{}; }; struct Default_pmr : public std::pmr::memory_resource { diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index bb02ec08c9..1e09182553 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -266,8 +266,6 @@ namespace os::mem { void* Pmr_resource::do_allocate(std::size_t size, std::size_t align) { auto cap = capacity(); if (UNLIKELY(size + used > cap)) { - printf(" ERROR: Failed to alloc %zu - currently allocated %zu capacity %zu\n", - this, size, used, cap); throw std::bad_alloc(); } @@ -281,9 +279,16 @@ namespace os::mem { void Pmr_resource::do_deallocate(void* ptr, std::size_t s, std::size_t a) { Expects(s != 0); // POSIX malloc will allow size 0, but return nullptr. + bool trigger_non_full = UNLIKELY(full() and non_full != nullptr); + pool_->deallocate(ptr,s,a); deallocs++; used -= s; + if (trigger_non_full) { + Ensures(!full()); + Ensures(non_full != nullptr); + non_full(*this); + } } bool Pmr_resource::do_is_equal(const std::pmr::memory_resource& other) const noexcept { diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index feed8a1644..2bd083fbbc 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -247,6 +247,50 @@ CASE("pmr::resource usage") { } -CASE("pmr::Resource performance") { +CASE("pmr::on_non_full event") { using namespace util; + constexpr auto pool_cap = 400_KiB; + + // Using default resource capacity, which is pool_cap / allocator count + os::mem::Pmr_pool pool{pool_cap}; + auto res = pool.get_resource(); + bool event_fired = false; + + res->on_non_full([&](auto& r){ + EXPECT(&r == res.get()); + EXPECT(not r.full()); + event_fired = true; + }); + + std::pmr::polymorphic_allocator alloc{res.get()}; + std::pmr::vector numbers{alloc}; + auto reserved = pool_cap - 2; + numbers.reserve(reserved); + EXPECT(numbers.capacity() == reserved); + EXPECT(res->allocated() == reserved); + EXPECT(not event_fired); + + numbers.push_back(0); + numbers.push_back(1); + + // In order to shrink, it needs to allocate new space for 2 chars then copy. + numbers.shrink_to_fit(); + EXPECT(res->allocated() < reserved); + EXPECT(event_fired); + event_fired = false; + EXPECT(not event_fired); + + for (int i = 2; i < pool_cap / 2; i++) { + numbers.push_back(i); + } + + EXPECT(not event_fired); + EXPECT(not res->full()); + + // Reduce capacity, making the resource full right now + pool.set_resource_capacity(pool_cap / 3); + numbers.clear(); + numbers.shrink_to_fit(); + EXPECT(event_fired); + } From fc56de04f20fa163d5fd46143704feb5b35883ed Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Thu, 6 Dec 2018 14:24:36 +0100 Subject: [PATCH 31/93] tcp: Fixed SND ack comparison --- src/net/tcp/connection.cpp | 77 ++++++++++++++++++------------- src/net/tcp/connection_states.cpp | 29 ++---------- 2 files changed, 47 insertions(+), 59 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 7b151eb259..7f47263905 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -170,6 +170,22 @@ void Connection::offer(size_t& packets) // write until we either cant send more (window closes or no more in queue), // or we're out of packets. + + static bool congested=false; + if (!can_send()) + { + if (sendq_remaining() > 0) + { + printf("Window_blocked %d data %zu empty packets sw=%ld fs=%ld WND=%d\n",sendq_remaining(),packets,(int64_t)send_window(), (int64_t)flight_size(),cb.SND.WND); + congested=true; + } + } + if (can_send() && congested) + { + printf("Window released congestion removed %zu packets\n",packets); + congested=false; + } + while(can_send() and packets) { auto packet = create_outgoing_packet(); @@ -397,49 +413,44 @@ bool Connection::handle_ack(const Packet_view& in) } // < dup ack // new ack - else if(LIKELY(in.ack() >= cb.SND.UNA)) - { - if(is_win_update(in, true_win)) - { - cb.SND.WND = true_win; - cb.SND.WL1 = in.seq(); - cb.SND.WL2 = in.ack(); - //printf(" Window update (%u)\n", cb.SND.WND); - } - //pred_flags = htonl((in.tcp_header_length() << 26) | 0x10 | cb.SND.WND >> cb.SND.wind_shift); - - // [RFC 6582] p. 8 - prev_highest_ack_ = cb.SND.UNA; - highest_ack_ = in.ack(); - if(cb.SND.TS_OK) - { - const auto* ts = in.ts_option(); - if(ts != nullptr) // TODO: not sure the packet is valid if TS missing - last_acked_ts_ = ts->ecr; - } + if(is_win_update(in, true_win)) + { + cb.SND.WND = true_win; + cb.SND.WL1 = in.seq(); + cb.SND.WL2 = in.ack(); + //printf(" Window update (%u)\n", cb.SND.WND); + } + //pred_flags = htonl((in.tcp_header_length() << 26) | 0x10 | cb.SND.WND >> cb.SND.wind_shift); - cb.SND.UNA = in.ack(); + // [RFC 6582] p. 8 + prev_highest_ack_ = cb.SND.UNA; + highest_ack_ = in.ack(); - rtx_ack(in.ack()); + if(cb.SND.TS_OK) + { + const auto* ts = in.ts_option(); + if(ts != nullptr) // TODO: not sure the packet is valid if TS missing + last_acked_ts_ = ts->ecr; + } - take_rtt_measure(in); + cb.SND.UNA = in.ack(); - // do either congctrl or fastrecov according to New Reno - (not fast_recovery_) - ? congestion_control(in) : fast_recovery(in); + rtx_ack(in.ack()); - dup_acks_ = 0; + take_rtt_measure(in); - if(in.has_tcp_data() or in.isset(FIN)) - return true; + // do either congctrl or fastrecov according to New Reno + (not fast_recovery_) + ? congestion_control(in) : fast_recovery(in); - } // < new ack + dup_acks_ = 0; - // ACK outside - else { + if(in.has_tcp_data() or in.isset(FIN)) return true; - } + + // < new ack + // Nothing to process return false; } diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 7d477b785b..90c7973a4d 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -240,17 +240,10 @@ bool Connection::State::check_ack(Connection& tcp, const Packet_view& in) { // Correction: [RFC 1122 p. 94] // ACK is inside sequence space //return tcp.handle_ack(in); - if(in.ack() <= tcb.SND.NXT ) { + + if ( (in.ack()-tcb.SND.UNA) <= (tcb.SND.NXT-tcb.SND.UNA)) { return tcp.handle_ack(in); - // this is a "new" ACK - //if(tcb.SND.UNA <= in->ack()) { - - // this is a NEW ACK - //if(tcb.SND.UNA < in->ack()) - //{ - // tcp.acknowledge(in->ack()); - //} // [RFC 5681] /* DUPLICATE ACKNOWLEDGMENT: @@ -268,23 +261,7 @@ bool Connection::State::check_ack(Connection& tcp, const Packet_view& in) { new data unless the incoming duplicate acknowledgment contains new SACK information. */ - // this is a RFC 5681 DUP ACK - //!in->isset(FIN) and !in->isset(SYN) - //else if(tcp.reno_is_dup_ack(in)) { - // debug2(" Reno Dup ACK %u\n", in->ack()); - // tcp.reno_dup_ack(in->ack()); - //} - // this is an RFC 793 DUP ACK - //else { - //printf(" RFC 793 Dup ACK %u\n", in->ack()); - //} - //} - // this is an "old" ACK out of order - //else { - // printf(" ACK out of order (SND.UNA > ACK)\n"); - //} - // tcp.signal_sent(); - // return that buffer has been SENT - currently no support to receipt sent buffer. + } /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) then send an ACK, drop the segment, and return. */ else { From c865b20d84b33873dc190602c02fb97bc65ab993 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Thu, 6 Dec 2018 14:37:12 +0100 Subject: [PATCH 32/93] tcp: removed uneccesary old print --- src/net/tcp/connection.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 7f47263905..fa4f3dd918 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -171,21 +171,6 @@ void Connection::offer(size_t& packets) // write until we either cant send more (window closes or no more in queue), // or we're out of packets. - static bool congested=false; - if (!can_send()) - { - if (sendq_remaining() > 0) - { - printf("Window_blocked %d data %zu empty packets sw=%ld fs=%ld WND=%d\n",sendq_remaining(),packets,(int64_t)send_window(), (int64_t)flight_size(),cb.SND.WND); - congested=true; - } - } - if (can_send() && congested) - { - printf("Window released congestion removed %zu packets\n",packets); - congested=false; - } - while(can_send() and packets) { auto packet = create_outgoing_packet(); From 2a5af62846dbb6c9ffec609a1605d418afbd249f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Thu, 6 Dec 2018 16:52:57 +0100 Subject: [PATCH 33/93] tcp: Calc rcv wnd more frequent etc --- api/net/tcp/connection.hpp | 7 ++++++- src/net/tcp/connection.cpp | 24 ++++++++++++++++-------- src/net/tcp/connection_states.cpp | 19 ++++++++++++++++++- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index 0195ed5ec1..126e61fa30 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -293,7 +293,7 @@ class Connection { * @return True if able to send, False otherwise. */ bool can_send() const noexcept - { return (usable_window() >= SMSS()) and writeq.has_remaining_requests(); } + { return usable_window() and writeq.has_remaining_requests(); } /** * @brief Return the "tuple" (id) of the connection. @@ -861,6 +861,11 @@ class Connection { */ bool handle_ack(const Packet_view&); + void update_rcv_wnd() { + cb.RCV.WND = (recv_wnd_getter == nullptr) ? + calculate_rcv_wnd() : recv_wnd_getter(); + } + uint32_t calculate_rcv_wnd() const; /** diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 7cbe60047c..500b2d9ab5 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -438,6 +438,8 @@ bool Connection::handle_ack(const Packet_view& in) rtx_ack(in.ack()); + update_rcv_wnd(); + take_rtt_measure(in); // do either congctrl or fastrecov according to New Reno @@ -667,14 +669,15 @@ void Connection::rtx_ack(const seq_t ack) { uint32_t Connection::calculate_rcv_wnd() const { // PRECISE REPORTING - /* + if(UNLIKELY(read_request == nullptr)) + return 0xffff; const auto& rbuf = read_request->front(); auto remaining = rbuf.capacity() - rbuf.size(); - auto win = (bufalloc->allocatable() + remaining) - rbuf.capacity(); + auto win = (bufalloc->allocatable() - (host_.max_bufsize()*1) + remaining) - rbuf.capacity(); + //auto win = (bufalloc->allocatable() - (host_.max_bufsize())); //auto max = read_request->front().capacity(); //win = (win < max) ? (rbuf.capacity() - rbuf.size()) : win - max; return win; - */ // REPORT CHUNKWISE /* @@ -735,10 +738,10 @@ void Connection::recv_data(const Packet_view& in) // this shouldn't be necessary with well behaved connections. // I also think we shouldn't reach this point due to State::check_seq checking // if we're inside the window. if packet is out of order tho we can change the RCV wnd (i think). - if(UNLIKELY(bufalloc->allocatable() < host_.max_bufsize())) { + /*if(UNLIKELY(bufalloc->allocatable() < host_.max_bufsize())) { drop(in, Drop_reason::RCV_WND_ZERO); return; - } + }*/ size_t length = in.tcp_data_length(); @@ -788,7 +791,7 @@ void Connection::recv_data(const Packet_view& in) // this ensures that the data we ACK is actually put in our buffer. Ensures(recv == length); // adjust the rcv wnd to (maybe) new value - cb.RCV.WND = (recv_wnd_getter == nullptr) ? calculate_rcv_wnd() : recv_wnd_getter(); + update_rcv_wnd(); } } // Packet out of order @@ -949,12 +952,16 @@ void Connection::retransmit() { syn_rtx_++; } // If not, check if there is data and retransmit - else if(writeq.size()) { + else if(writeq.size()) + { auto& buf = writeq.una(); + + // TODO: Finish to send window zero probe, but only on rtx timeout + debug2(" With data (wq.sz=%u) buf.unacked=%u\n", writeq.size(), buf.length() - buf.acknowledged); fill_packet(*packet, buf->data() + writeq.acked(), buf->size() - writeq.acked()); - packet->set_flag(PSH); + packet->set_flag(PSH); } rtx_attempt_++; packet->set_seq(cb.SND.UNA); @@ -1342,6 +1349,7 @@ bool Connection::uses_SACK() const noexcept void Connection::drop(const Packet_view& packet, [[maybe_unused]]Drop_reason reason) { + //printf("Drop %s %#.x\n", packet.to_string().c_str(), reason); host_.drop(packet); } diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 90c7973a4d..2703329779 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -117,7 +117,18 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) debug2(" TCB: %s \n",tcb.to_string().c_str()); // #1 - The packet we expect - if( in.seq() == tcb.RCV.NXT ) { + if( in.seq() == tcb.RCV.NXT ) + { + static uint16_t ack_probe = 0; + if(UNLIKELY(tcb.RCV.WND == 0 and in.tcp_data_length() > ack_probe)) + { + //if(in.tcp_data_length() == 1) + // printf("RCV PROBE %s\n", in.to_string().c_str()); + + tcp.update_rcv_wnd(); + goto unacceptable; + } + goto acceptable; } /// if SACK isn't permitted there is no point handling out-of-order packets @@ -130,6 +141,7 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) } // #3 (INVALID) - Packet is outside the right edge of the recv window else if( packet_end > tcb.RCV.NXT+tcb.RCV.WND ) { + //printf("Outside right: %s NXT=%u WND=%u\n", in.to_string().c_str(), tcb.RCV.NXT, tcb.RCV.WND); goto unacceptable; } // #4 - Packet with payload is what we expect or bigger, but inside our window @@ -137,6 +149,11 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) and packet_end < tcb.RCV.NXT+tcb.RCV.WND ) { goto acceptable; } + else + { + //printf("Probably outside on left side %s end=%u NXT=%u WND=%u\n", + // in.to_string().c_str(), packet_end, tcb.RCV.NXT, tcb.RCV.WND); + } /* If an incoming segment is not acceptable, an acknowledgment should be sent in reply (unless the RST bit is set, if so drop From 421d897aa224de610a98aa8f102b029fcadd8316 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 6 Dec 2018 21:21:19 +0100 Subject: [PATCH 34/93] tcp: WIP move drop on rcv-win closed and allow window updates when closed --- src/net/tcp/connection.cpp | 10 +++++----- src/net/tcp/connection_states.cpp | 10 ---------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index cf1d60eb51..1a3d58e0f5 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -465,7 +465,7 @@ void Connection::congestion_control(const Packet_view& in) } // < congestion avoidance // try to write - if(can_send() and !in.has_tcp_data()) + if(can_send() and (!in.has_tcp_data() or cb.RCV.WND < in.tcp_data_length())) { debug2(" Can send UW: %u SMSS: %u\n", usable_window(), SMSS()); send_much(); @@ -730,14 +730,13 @@ void Connection::recv_data(const Packet_view& in) size_t length = in.tcp_data_length(); - /* if(UNLIKELY(cb.RCV.WND < length)) { - printf("DROP: Receive window too small - my window is now: %u \n", cb.RCV.WND); drop(in, Drop_reason::RCV_WND_ZERO); + update_rcv_wnd(); + send_ack(); return; } - */ // Keep track if a packet is being sent during the async read callback @@ -1334,7 +1333,8 @@ bool Connection::uses_SACK() const noexcept void Connection::drop(const Packet_view& packet, [[maybe_unused]]Drop_reason reason) { - //printf("Drop %s %#.x\n", packet.to_string().c_str(), reason); + /*printf("Drop %s %#.x RCV.WND: %u RCV.NXT %u alloc free: %zu flight size: %u SND.WND: %u \n", + packet.to_string().c_str(), reason, cb.RCV.WND, cb.RCV.NXT, bufalloc->allocatable(), flight_size(), cb.SND.WND);*/ host_.drop(packet); } diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 2703329779..49395a4dd2 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -119,16 +119,6 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) // #1 - The packet we expect if( in.seq() == tcb.RCV.NXT ) { - static uint16_t ack_probe = 0; - if(UNLIKELY(tcb.RCV.WND == 0 and in.tcp_data_length() > ack_probe)) - { - //if(in.tcp_data_length() == 1) - // printf("RCV PROBE %s\n", in.to_string().c_str()); - - tcp.update_rcv_wnd(); - goto unacceptable; - } - goto acceptable; } /// if SACK isn't permitted there is no point handling out-of-order packets From a3093feab2c5735fbf4b8c92945b2e8e83b364ef Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 6 Dec 2018 21:34:20 +0100 Subject: [PATCH 35/93] test: TCP circle of evil --- test/net/integration/tcp/service.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/test/net/integration/tcp/service.cpp b/test/net/integration/tcp/service.cpp index 2b7ae102a8..f7f350e59a 100644 --- a/test/net/integration/tcp/service.cpp +++ b/test/net/integration/tcp/service.cpp @@ -24,6 +24,7 @@ using namespace net; using namespace std::chrono; // For timers and MSL +using namespace util; // For KiB/MiB/GiB literals tcp::Connection_ptr client; static Inet& stack() @@ -33,7 +34,7 @@ static Inet& stack() TEST VARIABLES */ tcp::port_t -TEST1{8081}, TEST2{8082}, TEST3{8083}, TEST4{8084}, TEST5{8085}; +TEST0{8080},TEST1{8081}, TEST2{8082}, TEST3{8083}, TEST4{8084}, TEST5{8085}; using HostAddress = std::pair; HostAddress @@ -132,6 +133,8 @@ struct Buffer { std::string str() { return {data, size};} }; +size_t recv = 0; +size_t chunks = 0; void Service::start() { #ifdef USERSPACE_LINUX @@ -166,6 +169,9 @@ void Service::start() // reduce test duration tcp.set_MSL(MSL_TEST); + // Modify total buffers assigned to TCP here + tcp.set_total_bufsize(64_MiB); + /* TEST: Send and receive small string. */ @@ -177,6 +183,24 @@ void Service::start() CHECK(tcp.listening_ports() == 0, "No (0) open ports (listening connections)"); CHECK(tcp.active_connections() == 0, "No (0) active connections"); + // Trigger with e.g.: + // dd if=/dev/zero bs=9000 count=1000000 | nc 10.0.0.44 8080 | grep Received -a + tcp.listen(TEST0).on_connect([](tcp::Connection_ptr conn) { + INFO("Test 0", "Circle of Evil"); + conn->on_read(424242, [conn](tcp::buffer_t buffer) { + recv += buffer->size(); + chunks++; + if (chunks % 100 == 0) { + std::string res = std::string("Received ") + util::Byte_r(recv).to_string() + "\n"; + printf("%s", res.c_str()); + auto new_buf = std::make_shared>(res.begin(), res.end()); + conn->write(new_buf); + } + conn->write(buffer); + }); + }); + + tcp.listen(TEST1).on_connect([](tcp::Connection_ptr conn) { INFO("Test 1", "SMALL string (%u)", small.size()); conn->on_read(small.size(), [conn](tcp::buffer_t buffer) { From c6e007919f2cca5709a21a60bafa4f751cfc1398 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 7 Dec 2018 14:17:28 +0100 Subject: [PATCH 36/93] liveupdate: allocate new memory resource on deserialize connection --- lib/LiveUpdate/serialize_tcp.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/LiveUpdate/serialize_tcp.cpp b/lib/LiveUpdate/serialize_tcp.cpp index 3a4b62964c..ce3e1ace5e 100644 --- a/lib/LiveUpdate/serialize_tcp.cpp +++ b/lib/LiveUpdate/serialize_tcp.cpp @@ -175,6 +175,9 @@ void Connection::deserialize_from(void* addr) slumbering_ip4.insert(&this->host_.stack()); } + // Assign new memory resource from TCP + this->bufalloc = host_.mempool_.get_resource(); + /// restore read queue auto* readq = (read_buffer*) &area->vla[writeq_len]; if (readq->capacity) From fbbc1a16fa4aa2af7de9db3e3a7f669613db0bcb Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 7 Dec 2018 15:36:41 +0100 Subject: [PATCH 37/93] microlb: give default to new ctor param to preserve API --- lib/microLB/micro_lb/autoconf.cpp | 5 ++--- lib/microLB/micro_lb/balancer.hpp | 6 +++--- lib/microLB/micro_lb/openssl.cpp | 3 +-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/microLB/micro_lb/autoconf.cpp b/lib/microLB/micro_lb/autoconf.cpp index 221b108791..7406b44644 100644 --- a/lib/microLB/micro_lb/autoconf.cpp +++ b/lib/microLB/micro_lb/autoconf.cpp @@ -44,9 +44,8 @@ namespace microLB { assert(clients.HasMember("key") && "TLS-enabled microLB must also have key"); // create TLS over TCP load balancer - balancer = new Balancer(netinc, CLIENT_PORT, netout, use_active_check, - clients["certificate"].GetString(), - clients["key"].GetString()); + balancer = new Balancer(netinc, CLIENT_PORT, netout, clients["certificate"].GetString(), + clients["key"].GetString(), use_active_check); } else { // create TCP load balancer diff --git a/lib/microLB/micro_lb/balancer.hpp b/lib/microLB/micro_lb/balancer.hpp index 41a6cb503a..8fb095b320 100644 --- a/lib/microLB/micro_lb/balancer.hpp +++ b/lib/microLB/micro_lb/balancer.hpp @@ -97,9 +97,9 @@ namespace microLB }; struct Balancer { - Balancer(netstack_t& in, uint16_t port, netstack_t& out, bool do_ac); - Balancer(netstack_t& in, uint16_t port, netstack_t& out, bool do_ac, - const std::string& cert, const std::string& key); + Balancer(netstack_t& in, uint16_t port, netstack_t& out, bool do_ac = false); + Balancer(netstack_t& in, uint16_t port, netstack_t& out, + const std::string& cert, const std::string& key, bool do_ac = false); static Balancer* from_config(); int wait_queue() const; diff --git a/lib/microLB/micro_lb/openssl.cpp b/lib/microLB/micro_lb/openssl.cpp index 139f3fada0..455f26d417 100644 --- a/lib/microLB/micro_lb/openssl.cpp +++ b/lib/microLB/micro_lb/openssl.cpp @@ -10,9 +10,8 @@ namespace microLB netstack_t& in, uint16_t port, netstack_t& out, - const bool do_ac, const std::string& tls_cert, - const std::string& tls_key) + const std::string& tls_key, const bool do_ac) : nodes(do_ac), netin(in), netout(out), signal({this, &Balancer::handle_queue}) { fs::memdisk().init_fs( From 57d11b8cb2ab085b3b92a4b10f2f881394b16979 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 7 Dec 2018 15:44:05 +0100 Subject: [PATCH 38/93] tcp: update rcv win on all dropped packets --- src/net/tcp/connection_states.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 49395a4dd2..6ec6c14896 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -156,6 +156,8 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) */ unacceptable: + tcp.update_rcv_wnd(); + if(!in.isset(RST)) tcp.send_ack(); From 26f1b9aa98a6d20cee24f59424409020e2b62d6c Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 7 Dec 2018 16:26:29 +0100 Subject: [PATCH 39/93] tcp: update receive window on all acks, avoid silly windows --- src/net/tcp/connection.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 1a3d58e0f5..fa4d31a9a3 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -283,6 +283,7 @@ void Connection::receive_disconnect() { } } + void Connection::segment_arrived(Packet_view& incoming) { //const uint32_t FMASK = (~(0x0000000F | htons(0x08))); @@ -320,6 +321,7 @@ int Connection::serialize_to(void*) const { return 0; } Packet_view_ptr Connection::create_outgoing_packet() { + update_rcv_wnd(); auto packet = (is_ipv6_) ? host_.create_outgoing_packet6() : host_.create_outgoing_packet(); // Set Source (local == the current connection) @@ -656,13 +658,14 @@ uint32_t Connection::calculate_rcv_wnd() const // PRECISE REPORTING if(UNLIKELY(read_request == nullptr)) return 0xffff; + const auto& rbuf = read_request->front(); auto remaining = rbuf.capacity() - rbuf.size(); auto win = (bufalloc->allocatable() - (host_.max_bufsize()*1) + remaining) - rbuf.capacity(); //auto win = (bufalloc->allocatable() - (host_.max_bufsize())); //auto max = read_request->front().capacity(); //win = (win < max) ? (rbuf.capacity() - rbuf.size()) : win - max; - return win; + return (win < SMSS()) ? 0 : win; // Avoid small silly windows // REPORT CHUNKWISE /* @@ -680,8 +683,8 @@ uint32_t Connection::calculate_rcv_wnd() const return win; */ - // REPORT CONSTANT - return bufalloc->allocatable(); + // REPORT CHUNKWISE FROM ALLOCATOR + //return bufalloc->allocatable(); } /* @@ -774,8 +777,6 @@ void Connection::recv_data(const Packet_view& in) const auto recv = read_request->insert(in.seq(), in.tcp_data(), length, in.isset(PSH)); // this ensures that the data we ACK is actually put in our buffer. Ensures(recv == length); - // adjust the rcv wnd to (maybe) new value - update_rcv_wnd(); } } // Packet out of order @@ -786,6 +787,7 @@ void Connection::recv_data(const Packet_view& in) recv_out_of_order(in); } + // User callback didnt result in transmitting an ACK if(cb.SND.NXT == snd_nxt) ack_data(); @@ -943,7 +945,7 @@ void Connection::retransmit() { // TODO: Finish to send window zero probe, but only on rtx timeout debug2(" With data (wq.sz=%u) buf.unacked=%u\n", - writeq.size(), buf.length() - buf.acknowledged); + writeq.size(), buf->size(), buf->size() - writeq.acked()); fill_packet(*packet, buf->data() + writeq.acked(), buf->size() - writeq.acked()); packet->set_flag(PSH); } From b10f96bebb8e317fafb3fdabd5fa565edc85b2f5 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 7 Dec 2018 16:27:27 +0100 Subject: [PATCH 40/93] microlb: use TCP's internal receive window calculations --- lib/microLB/micro_lb/balancer.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 0a20e081c8..770c5653ad 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -439,29 +439,7 @@ namespace microLB nodes.close_session(idx); }); - // get the actual TCP connections - auto conn_in = dynamic_cast(incoming->bottom_transport())->tcp(); - assert(conn_in != nullptr); - auto conn_out = dynamic_cast(outgoing->bottom_transport())->tcp(); - assert(conn_out != nullptr); - static const uint32_t sendq_max = 0x400000; - // set recv window handlers - conn_in->set_recv_wnd_getter( - [conn_out] () -> uint32_t { - auto sendq_size = conn_out->sendq_size(); - - if (sendq_size > sendq_max) - printf("WARNING: Incoming reports sendq size: %u\n", sendq_size); - return sendq_max - sendq_size; - }); - conn_out->set_recv_wnd_getter( - [conn_in] () -> uint32_t { - auto sendq_size = conn_in->sendq_size(); - if (sendq_size > sendq_max) - printf("WARNING: Outgoing reports sendq size: %u\n", sendq_size); - return sendq_max - sendq_size; - }); } bool Session::is_alive() const { return incoming != nullptr; From 2a5396b87eccb06edee619175d8468d29eb338e2 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Fri, 7 Dec 2018 20:34:38 +0100 Subject: [PATCH 41/93] tcp: SSSE3 checksum algo added alongside AVX need -mssse3 for it to be compiled --- src/net/checksum.cpp | 115 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 4 deletions(-) diff --git a/src/net/checksum.cpp b/src/net/checksum.cpp index e981f001fc..ab7c992d9c 100644 --- a/src/net/checksum.cpp +++ b/src/net/checksum.cpp @@ -16,9 +16,17 @@ // limitations under the License. #include -#include -#include +#include + +#if defined(__AVX2__) + #include +#endif +#elif defined(__SSSE3__) + #include +#endif + #include +#include namespace net { @@ -26,14 +34,113 @@ uint16_t checksum(uint32_t tsum, const void* data, size_t length) noexcept { const char* buffer = (const char*) data; int64_t sum = tsum; + if (UNLIKELY(length == 0)) + return 0xffff; + + if (UNLIKELY(buffer == 0)) + return 0xffff; + + + +#if defined(__SSSE3__) + static __m128i swap16a = _mm_setr_epi16(0x0001, 0xffff, 0x0203, 0xffff, + 0x0405, 0xffff, 0x0607, 0xffff); + static __m128i swap16b = _mm_setr_epi16(0x0809, 0xffff, 0x0a0b, 0xffff, + 0x0c0d, 0xffff, 0x0e0f, 0xffff); + size_t count; + __m128i zero = _mm_setzero_si128(); + __m128i suma=zero; + __m128i sumb=zero; + __m128i oldsum; + + //according to godbolt its sligtly better to count index than incrementing pointer + for(count = 0; (count+64) < length; count+=64) + { + __m128i dblock1,dblock2; + dblock1 = _mm_loadu_si128((__m128i *) (&buffer[count + 0])); + dblock2 = _mm_loadu_si128((__m128i *) (&buffer[count + 16])); + + suma = _mm_add_epi32(suma,_mm_shuffle_epi8(dblock1,swap16a)); + sumb = _mm_add_epi32(sumb,_mm_shuffle_epi8(dblock1,swap16b)); + suma = _mm_add_epi32(suma,_mm_shuffle_epi8(dblock2,swap16a)); + sumb = _mm_add_epi32(sumb,_mm_shuffle_epi8(dblock2,swap16b)); + + dblock1 = _mm_loadu_si128((__m128i *) (&buffer[count + 32])); + dblock2 = _mm_loadu_si128((__m128i *) (&buffer[count + 48])); + + suma = _mm_add_epi32(suma,_mm_shuffle_epi8(dblock1,swap16a)); + sumb = _mm_add_epi32(sumb,_mm_shuffle_epi8(dblock1,swap16b)); + + suma = _mm_add_epi32(suma,_mm_shuffle_epi8(dblock2,swap16a)); + sumb = _mm_add_epi32(sumb,_mm_shuffle_epi8(dblock2,swap16b)); + } + //why are we not doing a 32 ? + while ((count+16) <= length) + { + __m128i dblock; + dblock= _mm_loadu_si128((__m128i *) (&buffer[count])); + suma = _mm_add_epi32(suma,_mm_shuffle_epi8(dblock,swap16a)); + sumb = _mm_add_epi32(sumb,_mm_shuffle_epi8(dblock,swap16b)); + count+=16; + } + + /*alignas(16) this can be unaligned as we most likely are accessing it unaligned anyays*/ + alignas(16) static const uint8_t shift_tab[48]={ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + /* we could extend this to fast convert 4 words to LE from BE */ + alignas(16) static const uint8_t swap32[16]{ + 0x03,0x02,0x01,0x00,0x80,0x80,0x80,0x80, + 0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80 + }; + int rest=16-(length-count); + //this over reads but who cares?! + if (LIKELY(rest != 16)) + { + __m128i dblock; + dblock = _mm_loadu_si128((__m128i *)(&buffer[count])); + //shifting the data up then down gives us leading zeroes clearing out any over read data + //the shift up shuffles the data bytewise into place + dblock = _mm_shuffle_epi8(dblock, _mm_loadu_si128((__m128i *)(&shift_tab[16 - rest]))); + dblock = _mm_shuffle_epi8(dblock, _mm_loadu_si128((__m128i *)(&shift_tab[16 + rest]))); + suma = _mm_add_epi32(suma,_mm_shuffle_epi8(dblock,swap16a)); + sumb = _mm_add_epi32(sumb,_mm_shuffle_epi8(dblock,swap16b)); + } + + suma = _mm_add_epi32(suma, sumb); + //add 0 and 1 to 0 and 2 and 3 to 1 + suma = _mm_hadd_epi32(suma, _mm_setzero_si128()); + //add 0 and 1 to 1 .. + suma = _mm_hadd_epi32(suma, _mm_setzero_si128()); + + oldsum = _mm_shuffle_epi8(_mm_cvtsi32_si128(tsum), _mm_loadu_si128((__m128i *)&swap32[0])); + suma = _mm_add_epi32(suma,oldsum); //adds the old csum to this + //fix endianess + + //extract the 32 bit sum from vector + uint32_t vsum; + vsum=(uint32_t) _mm_cvtsi128_si32(suma); + + //printf("Vsum + swapped(tsum) %08x\n",vsum); + //maybe this only works if tsum is byteswapped ? + while (vsum >>16) + { + vsum=(vsum & 0xFFFF)+(vsum>>16); + } + //allways right in this case as its allways little endian x86 + return ~net::ntohs((uint16_t)(vsum)); +#elif defined(__AVX2__) // VEX-align buffer while (((uintptr_t) buffer & 15) && length >= 4) { sum += *(uint32_t*) buffer; length -= 4; buffer += 4; } - -#ifdef __AVX2__ // run 4 32-bit adds in parallell union vec4 { __m256i mm; From 506b3faffd6d4cadef0800188fdf84b868034980 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 11 Dec 2018 16:31:49 +0100 Subject: [PATCH 42/93] tcp: return early if connection couldn't be established --- src/net/tcp/listener.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/net/tcp/listener.cpp b/src/net/tcp/listener.cpp index 8b6d65dfb3..144301473e 100644 --- a/src/net/tcp/listener.cpp +++ b/src/net/tcp/listener.cpp @@ -134,7 +134,8 @@ void Listener::connected(Connection_ptr conn) { debug(" %s connected\n", conn->to_string().c_str()); remove(conn); Expects(conn->is_connected()); - host_.add_connection(conn); + if (UNLIKELY(! host_.add_connection(conn))) + return; if(on_connect_ != nullptr) on_connect_(conn); From 2f44d2b3faa9797453be5bc641eb146381087ce1 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 11 Dec 2018 16:17:59 +0100 Subject: [PATCH 43/93] test: add stats printing for lb test --- test/net/integration/microLB/service.cpp | 99 ++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 6 deletions(-) diff --git a/test/net/integration/microLB/service.cpp b/test/net/integration/microLB/service.cpp index 0cc210aad2..8393db812b 100644 --- a/test/net/integration/microLB/service.cpp +++ b/test/net/integration/microLB/service.cpp @@ -15,20 +15,107 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include -#include +#include +#include #include +#include + +using namespace util; + +microLB::Balancer* balancer = nullptr; + +void print_nic_stats() { + printf("eth0.sendq_max: %zu, eth0.sendq_now: %zu " + "eth0.stat_rx_total_packets: %zu, eth0.stat_tx_total_packets: %zu, " + "eth0.stat_rx_total_bytes: %zu, eth0.stat_tx_total_bytes: %zu, " + "eth0.sendq_dropped: %zu, eth0.rx_refill_dropped: %zu \n", + Statman::get().get_by_name("eth0.sendq_max").get_uint64(), + Statman::get().get_by_name("eth0.sendq_now").get_uint64(), + Statman::get().get_by_name("eth0.stat_rx_total_packets").get_uint64(), + Statman::get().get_by_name("eth0.stat_tx_total_packets").get_uint64(), + Statman::get().get_by_name("eth0.stat_rx_total_bytes").get_uint64(), + Statman::get().get_by_name("eth0.stat_tx_total_bytes").get_uint64(), + Statman::get().get_by_name("eth0.sendq_dropped").get_uint64(), + Statman::get().get_by_name("eth0.rx_refill_dropped").get_uint64() + ); + + printf("eth1.sendq_max: %zu, eth1.sendq_now: %zu " + "eth1.stat_rx_total_packets: %zu, eth1.stat_tx_total_packets: %zu, " + "eth1.stat_rx_total_bytes: %zu, eth1.stat_tx_total_bytes: %zu, " + "eth1.sendq_dropped: %zu, eth1.rx_refill_dropped: %zu \n", + Statman::get().get_by_name("eth1.sendq_max").get_uint64(), + Statman::get().get_by_name("eth1.sendq_now").get_uint64(), + Statman::get().get_by_name("eth1.stat_rx_total_packets").get_uint64(), + Statman::get().get_by_name("eth1.stat_tx_total_packets").get_uint64(), + Statman::get().get_by_name("eth1.stat_rx_total_bytes").get_uint64(), + Statman::get().get_by_name("eth1.stat_tx_total_bytes").get_uint64(), + Statman::get().get_by_name("eth1.sendq_dropped").get_uint64(), + Statman::get().get_by_name("eth1.rx_refill_dropped").get_uint64() + ); + printf("\n\n"); +} + +void print_mempool_stats() { + auto& inet1 = net::Super_stack::get(0); + auto& inet2 = net::Super_stack::get(1); + printf("\n\nHeap used: %s\n", util::Byte_r(OS::heap_usage()).to_string().c_str()); + auto pool1 = inet1.tcp().mempool(); + auto pool2 = inet2.tcp().mempool(); + + // Hack to get the implementation details (e.g. the detail::pool ptr) for some stats + auto res1 = pool1.get_resource(); + auto res2 = pool2.get_resource(); + + auto pool_ptr1 = res1->pool(); + auto pool_ptr2 = res2->pool(); + + res1.reset(); + res2.reset(); + + printf("\n*** TCP0 ***\n%s\n pool: %zu / %zu allocs: %zu resources: %zu (used: %zu free: %zu)\n\n", + inet1.tcp().to_string().c_str(), pool1.allocated(), pool1.total_capacity(), pool1.alloc_count(), + pool1.resource_count(), pool_ptr1->used_resources(), + pool_ptr1->free_resources()); + printf("*** TCP1 ***\n%s\npool: %zu / %zu allocs: %zu resources: %zu (used: %zu free: %zu)\n", + inet2.tcp().to_string().c_str(), pool2.allocated(), pool2.total_capacity(), pool2.alloc_count(), + pool2.resource_count(), pool_ptr2->used_resources(), + pool_ptr2->free_resources()); +} + +void print_lb_stats() { + FILLINE('-'); + CENTER("LB-Stats"); + auto& nodes = balancer->nodes; + printf("Wait queue: %i nodes: %zu tot_sess: %i open_sess: %i timeout_sess: %i pool_size: %i \n", + balancer->wait_queue(), nodes.size(), nodes.total_sessions(), nodes.open_sessions(), nodes.timed_out_sessions(), nodes.pool_size()); + printf("\n\n"); +} void Service::start() { - static auto* balancer = microLB::Balancer::from_config(); + balancer = microLB::Balancer::from_config(); printf("MicroLB ready for test\n"); - auto& inet = net::Super_stack::get(0); - inet.tcp().set_MSL(std::chrono::seconds(2)); + auto& inet1 = net::Super_stack::get(0); + auto& inet2 = net::Super_stack::get(1); + //inet.tcp().set_MSL(std::chrono::seconds(2)); + inet1.tcp().set_total_bufsize(256_MiB); + inet2.tcp().set_total_bufsize(256_MiB); - Timers::oneshot(std::chrono::seconds(5), + /*Timers::oneshot(std::chrono::seconds(5), [] (int) { printf("TCP MSL ended (4 seconds)\n"); - }); + });*/ + //StackSampler::begin(); + + Timers::periodic(2s, 5s, [](auto) { + StackSampler::print(10); + print_nic_stats(); + print_mempool_stats(); + print_lb_stats(); + + }); } From 3c8ec308d0bb40c2393898b37040c11524a63539 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 11 Dec 2018 16:17:13 +0100 Subject: [PATCH 44/93] pmr: avoid giving whole pool to a single resource --- api/util/alloc_pmr.hpp | 3 ++- api/util/detail/alloc_pmr.hpp | 7 ++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp index a05f9f6f6a..d8cec64efa 100644 --- a/api/util/alloc_pmr.hpp +++ b/api/util/alloc_pmr.hpp @@ -43,7 +43,8 @@ namespace os::mem { class Pmr_pool { public: - static constexpr size_t default_max_resources = 64; + static constexpr size_t default_max_resources = 0xffffff; + static constexpr size_t resource_division_offset = 2; using Resource = Pmr_resource; using Resource_ptr = std::unique_ptr>; diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 1e09182553..f3524a2635 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -151,11 +151,8 @@ namespace os::mem::detail { } std::size_t resource_capacity() { - if (cap_suballoc_ == 0) { - if (used_resources_ == 0) - return cap_total_; - return cap_total_ / used_resources_; - } + if (cap_suballoc_ == 0) + return cap_total_ / (used_resources_ + os::mem::Pmr_pool::resource_division_offset); return cap_suballoc_; } From 75497dc5a35238b2c82d7c7de4be0dd6045a23d6 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 11 Dec 2018 16:16:27 +0100 Subject: [PATCH 45/93] tcp: flush mem on time-wait, abort when no memory etc. --- api/net/tcp/connection.hpp | 5 +++++ api/net/tcp/tcp.hpp | 4 +++- src/net/tcp/connection.cpp | 19 +++++++++++-------- src/net/tcp/connection_states.cpp | 3 +++ src/net/tcp/tcp.cpp | 15 ++++++++++++--- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index 126e61fa30..92dffed9d7 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -607,6 +607,11 @@ class Connection { void set_recv_wnd_getter(Recv_window_getter func) { recv_wnd_getter = func; } + void release_memory() { + read_request = nullptr; + bufalloc.reset(); + } + private: /** "Parent" for Connection. */ TCP& host_; diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 0c5c0bbd13..15845d5a62 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -708,8 +708,10 @@ namespace net { * @brief Adds a connection. * * @param[in] A ptr to the Connection + * + * @return True if the connection was added, false if rejected */ - void add_connection(tcp::Connection_ptr); + bool add_connection(tcp::Connection_ptr); /** * @brief Creates a connection. diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index fa4d31a9a3..145c8f61d4 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -661,10 +661,11 @@ uint32_t Connection::calculate_rcv_wnd() const const auto& rbuf = read_request->front(); auto remaining = rbuf.capacity() - rbuf.size(); - auto win = (bufalloc->allocatable() - (host_.max_bufsize()*1) + remaining) - rbuf.capacity(); - //auto win = (bufalloc->allocatable() - (host_.max_bufsize())); - //auto max = read_request->front().capacity(); - //win = (win < max) ? (rbuf.capacity() - rbuf.size()) : win - max; + + auto buf_avail = bufalloc->allocatable() + remaining; + auto reserve = (host_.max_bufsize() * Read_request::buffer_limit); + auto win = buf_avail > reserve ? buf_avail - reserve : 0; + return (win < SMSS()) ? 0 : win; // Avoid small silly windows // REPORT CHUNKWISE @@ -1155,12 +1156,14 @@ void Connection::clean_up() { if(timewait_dack_timer.is_running()) timewait_dack_timer.stop(); - // necessary to keep the shared_ptr alive during the whole function after _on_cleanup_ is called - // avoids connection being destructed before function is done - auto shared = retrieve_shared(); // clean up all other copies // either in TCP::listeners_ (open) or Listener::syn_queue_ (half-open) - if(_on_cleanup_) _on_cleanup_(shared); + if(_on_cleanup_) { + // necessary to keep the shared_ptr alive during the whole function after _on_cleanup_ is called + // avoids connection being destructed before function is done + auto shared = retrieve_shared(); + _on_cleanup_(shared); + } on_connect_.reset(); on_disconnect_.reset(); diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 6ec6c14896..dce440dec6 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -1070,6 +1070,7 @@ State::Result Connection::FinWait1::handle(Connection& tcp, Packet_view& in) { if(in.ack() == tcp.tcb().SND.NXT) { // TODO: I guess or FIN is ACK'ed..? tcp.set_state(TimeWait::instance()); + tcp.release_memory(); if(tcp.rtx_timer.is_running()) tcp.rtx_stop(); tcp.timewait_start(); @@ -1117,6 +1118,7 @@ State::Result Connection::FinWait2::handle(Connection& tcp, Packet_view& in) { Start the time-wait timer, turn off the other timers. */ tcp.set_state(Connection::TimeWait::instance()); + tcp.release_memory(); if(tcp.rtx_timer.is_running()) tcp.rtx_stop(); tcp.timewait_start(); @@ -1191,6 +1193,7 @@ State::Result Connection::Closing::handle(Connection& tcp, Packet_view& in) { if(in.ack() == tcp.tcb().SND.NXT) { // TODO: I guess or FIN is ACK'ed..? tcp.set_state(TimeWait::instance()); + tcp.release_memory(); tcp.timewait_start(); } diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index 737d7400d2..f8f518a66c 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -492,15 +492,24 @@ bool TCP::unbind(const Socket& socket) return false; } -void TCP::add_connection(tcp::Connection_ptr conn) { +bool TCP::add_connection(tcp::Connection_ptr conn) { // Stat increment number of incoming connections (*incoming_connections_)++; debug(" Connection added %s \n", conn->to_string().c_str()); - conn->_on_cleanup({this, &TCP::close_connection}); conn->bufalloc = mempool_.get_resource(); + + // Reject connection if we can't allocate memory + if (conn->bufalloc == nullptr + or conn->bufalloc->allocatable() < max_bufsize() * Read_request::buffer_limit){ + conn->_on_cleanup_ = nullptr; + conn->abort(); + return false; + } + Expects(conn->bufalloc != nullptr); - connections_.emplace(conn->tuple(), conn); + conn->_on_cleanup({this, &TCP::close_connection}); + return connections_.emplace(conn->tuple(), conn).second; } Connection_ptr TCP::create_connection(Socket local, Socket remote, ConnectCallback cb) From 1a788bdd139a6526136de44ce4551b5f41fe14ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Wed, 12 Dec 2018 11:48:34 +0100 Subject: [PATCH 46/93] test: Link with experimental lib when clang apple --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 75f593b346..2aa96aaa84 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,7 +47,7 @@ if (APPLE) string(STRIP ${BREW_LLVM} BREW_LLVM) set(BREW_LIBCXX_INC "-L${BREW_LLVM}/lib -I${BREW_LLVM}/include/c++/v1") message(STATUS "Brew libc++ location: " ${BREW_LIBCXX_INC}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${BREW_LIBCXX_INC} -stdlib=libc++ -nostdinc++ -Wno-unused-command-line-argument") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${BREW_LIBCXX_INC} -stdlib=libc++ -nostdinc++ -lc++experimental -Wno-unused-command-line-argument") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.12") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.12") From a8b3475308b61a4be84af2e973bfe02db37b5726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Thu, 13 Dec 2018 11:25:11 +0100 Subject: [PATCH 47/93] tcp: Only increase rtx attempts on rtx timeout --- src/net/tcp/connection.cpp | 9 +++++---- src/net/tcp/connection_states.cpp | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 145c8f61d4..22df9e5860 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -370,7 +370,7 @@ void Connection::transmit(Packet_view_ptr packet) { if(packet->isset(ACK)) last_ack_sent_ = cb.RCV.NXT; - //if(packet->has_tcp_data()) printf(" TX %s - NXT:%u\n", packet->to_string().c_str(), cb.SND.NXT); + //printf(" TX %s\n%s\n", packet->to_string().c_str(), to_string().c_str()); host_.transmit(std::move(packet)); } @@ -950,7 +950,6 @@ void Connection::retransmit() { fill_packet(*packet, buf->data() + writeq.acked(), buf->size() - writeq.acked()); packet->set_flag(PSH); } - rtx_attempt_++; packet->set_seq(cb.SND.UNA); /* @@ -1025,13 +1024,15 @@ void Connection::rtx_timeout() { signal_rtx_timeout(); // experimental if(rto_limit_reached()) { - debug(" RTX attempt limit reached, closing.\n"); + debug(" RTX attempt limit reached, closing. rtx=%u syn_rtx=%u\n", + rtx_attempt_, syn_rtx_); abort(); return; } // retransmit SND.UNA - retransmit(); // increases rtx_attempt + retransmit(); + rtx_attempt_++; // "back off" timer rttm.RTO *= 2.0; diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index dce440dec6..6d10299064 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -209,8 +209,8 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) void Connection::State::unallowed_syn_reset_connection(Connection& tcp, const Packet_view& in) { assert(in.isset(SYN)); - debug(" Unallowed SYN for STATE: %s, reseting connection.\n", - tcp.state().to_string().c_str()); + debug(" Unallowed SYN for STATE: %s, reseting connection. %s\n", + tcp.state().to_string().c_str(), in.to_string().c_str()); // Not sure if this is the correct way to send a "reset response" auto packet = tcp.outgoing_packet(); packet->set_seq(in.ack()).set_flag(RST); From 729f32b9a3210dcfe2ce25685c57a2067ab96e89 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 13 Dec 2018 12:25:52 +0100 Subject: [PATCH 48/93] test: update pmr test to reflect division factor --- test/util/unit/pmr_alloc_test.cpp | 35 +++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index 2bd083fbbc..d9669145d5 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -43,7 +43,7 @@ CASE("pmr::Pmr_pool usage") { constexpr auto pool_cap = 40_MiB; // Using default resource capacity, which is pool_cap / allocator count - os::mem::Pmr_pool pool{pool_cap}; + os::mem::Pmr_pool pool{pool_cap, pool_cap}; EXPECT(pool.total_capacity() == pool_cap); @@ -55,7 +55,6 @@ CASE("pmr::Pmr_pool usage") { std::pmr::polymorphic_allocator alloc{res.get()}; std::pmr::vector numbers{alloc}; - EXPECT(numbers.capacity() < 1000); numbers.reserve(1000); EXPECT(numbers.capacity() == 1000); @@ -85,7 +84,6 @@ CASE("pmr::Pmr_pool usage") { my_strings.push_back("Still works"); EXPECT(my_strings.back() == "Still works"); - // Using small res capacity constexpr auto alloc_cap = 4_KiB; @@ -121,6 +119,7 @@ CASE("pmr::Pmr_pool usage") { EXPECT(numbers2.capacity() < 1000); EXPECT(res2->allocatable() < alloc_cap); EXPECT(res2->allocatable() > alloc_cap - 1000); + } @@ -252,7 +251,7 @@ CASE("pmr::on_non_full event") { constexpr auto pool_cap = 400_KiB; // Using default resource capacity, which is pool_cap / allocator count - os::mem::Pmr_pool pool{pool_cap}; + os::mem::Pmr_pool pool{pool_cap, pool_cap}; auto res = pool.get_resource(); bool event_fired = false; @@ -294,3 +293,31 @@ CASE("pmr::on_non_full event") { EXPECT(event_fired); } + + +CASE("pmr::default resource cap") { + // Not providing a resource cap will give each resource a proportion of max + + using namespace util; + constexpr auto pool_cap = 400_KiB; + + // Using default resource capacity, which is pool_cap / allocator count + os::mem::Pmr_pool pool{pool_cap}; + auto res1 = pool.get_resource(); + auto expected = pool_cap / (1 + os::mem::Pmr_pool::resource_division_offset); + EXPECT(res1->allocatable() == expected); + + auto res2 = pool.get_resource(); + expected = pool_cap / (2 + os::mem::Pmr_pool::resource_division_offset); + EXPECT(res2->allocatable() == expected); + + auto res3 = pool.get_resource(); + expected = pool_cap / (3 + os::mem::Pmr_pool::resource_division_offset); + EXPECT(res3->allocatable() == expected); + + auto res4 = pool.get_resource(); + expected = pool_cap / (4 + os::mem::Pmr_pool::resource_division_offset); + EXPECT(res4->allocatable() == expected); + + +} From ca0bd7d7a66932584125d4f31acda248d33ac660 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 13 Dec 2018 12:56:05 +0100 Subject: [PATCH 49/93] test: add back MSL timeout to microlb test --- test/net/integration/microLB/service.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/net/integration/microLB/service.cpp b/test/net/integration/microLB/service.cpp index 8393db812b..77a59137b0 100644 --- a/test/net/integration/microLB/service.cpp +++ b/test/net/integration/microLB/service.cpp @@ -101,21 +101,22 @@ void Service::start() printf("MicroLB ready for test\n"); auto& inet1 = net::Super_stack::get(0); auto& inet2 = net::Super_stack::get(1); - //inet.tcp().set_MSL(std::chrono::seconds(2)); - inet1.tcp().set_total_bufsize(256_MiB); - inet2.tcp().set_total_bufsize(256_MiB); + inet1.tcp().set_MSL(std::chrono::seconds(2)); - /*Timers::oneshot(std::chrono::seconds(5), + // Increasing TCP buffer size may increase throughput + //inet1.tcp().set_total_bufsize(256_MiB); + //inet2.tcp().set_total_bufsize(256_MiB); + + Timers::oneshot(std::chrono::seconds(5), [] (int) { printf("TCP MSL ended (4 seconds)\n"); - });*/ + }); //StackSampler::begin(); Timers::periodic(2s, 5s, [](auto) { - StackSampler::print(10); + //StackSampler::print(10); print_nic_stats(); print_mempool_stats(); print_lb_stats(); - }); } From acbe9091644fcfb9d744f20d110869a13cbefa96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Thu, 13 Dec 2018 14:20:58 +0100 Subject: [PATCH 50/93] tcp: shared ptr cleanup magic --- src/net/tcp/connection.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 22df9e5860..12283b2ab9 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -1157,12 +1157,13 @@ void Connection::clean_up() { if(timewait_dack_timer.is_running()) timewait_dack_timer.stop(); + // necessary to keep the shared_ptr alive during the whole function after _on_cleanup_ is called + // avoids connection being destructed before function is done + Connection_ptr shared; // clean up all other copies // either in TCP::listeners_ (open) or Listener::syn_queue_ (half-open) if(_on_cleanup_) { - // necessary to keep the shared_ptr alive during the whole function after _on_cleanup_ is called - // avoids connection being destructed before function is done - auto shared = retrieve_shared(); + shared = retrieve_shared(); _on_cleanup_(shared); } @@ -1174,7 +1175,7 @@ void Connection::clean_up() { read_request->callback.reset(); _on_cleanup_.reset(); - debug(" Succesfully cleaned up %s\n", to_string().c_str()); + debug2(" Succesfully cleaned up %s\n", to_string().c_str()); } std::string Connection::TCB::to_string() const { From c6e9ebf0a93361bc2f0cf9067cdf6dd93017d23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Thu, 13 Dec 2018 14:37:46 +0100 Subject: [PATCH 51/93] tcp: Don't use shared ptr when cleaning up --- api/net/tcp/connection.hpp | 2 +- api/net/tcp/listener.hpp | 2 +- api/net/tcp/tcp.hpp | 2 +- src/net/tcp/connection.cpp | 25 +++++++++++++------------ src/net/tcp/connection_states.cpp | 3 ++- src/net/tcp/listener.cpp | 6 +++--- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index 92dffed9d7..d40105dc1a 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -727,7 +727,7 @@ class Connection { * * @param Connection to be cleaned up */ - using CleanupCallback = delegate; + using CleanupCallback = delegate; CleanupCallback _on_cleanup_; inline Connection& _on_cleanup(CleanupCallback cb); diff --git a/api/net/tcp/listener.hpp b/api/net/tcp/listener.hpp index 6f0af4df74..9a2202b56b 100644 --- a/api/net/tcp/listener.hpp +++ b/api/net/tcp/listener.hpp @@ -103,7 +103,7 @@ class Listener { void segment_arrived(Packet_view&); - void remove(Connection_ptr); + void remove(const Connection*); void connected(Connection_ptr); diff --git a/api/net/tcp/tcp.hpp b/api/net/tcp/tcp.hpp index 15845d5a62..9e48eaa175 100644 --- a/api/net/tcp/tcp.hpp +++ b/api/net/tcp/tcp.hpp @@ -731,7 +731,7 @@ namespace net { * * @param[in] conn A ptr to a Connection */ - void close_connection(tcp::Connection_ptr conn) + void close_connection(const tcp::Connection* conn) { unbind(conn->local()); connections_.erase(conn->tuple()); diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 12283b2ab9..796d004d47 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -1157,25 +1157,26 @@ void Connection::clean_up() { if(timewait_dack_timer.is_running()) timewait_dack_timer.stop(); - // necessary to keep the shared_ptr alive during the whole function after _on_cleanup_ is called - // avoids connection being destructed before function is done - Connection_ptr shared; - // clean up all other copies - // either in TCP::listeners_ (open) or Listener::syn_queue_ (half-open) - if(_on_cleanup_) { - shared = retrieve_shared(); - _on_cleanup_(shared); - } - + // make sure all our delegates are cleaned up (to avoid circular dependencies) on_connect_.reset(); on_disconnect_.reset(); on_close_.reset(); recv_wnd_getter.reset(); if(read_request) read_request->callback.reset(); - _on_cleanup_.reset(); - debug2(" Succesfully cleaned up %s\n", to_string().c_str()); + + debug2(" Call clean_up delg on %s\n", to_string().c_str()); + // clean up all other copies + // either in TCP::listeners_ (open) or Listener::syn_queue_ (half-open) + if(_on_cleanup_) + _on_cleanup_(this); + + + // if someone put a copy in this delg its their problem.. + //_on_cleanup_.reset(); + + debug2(" Succesfully cleaned up\n"); } std::string Connection::TCB::to_string() const { diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 6d10299064..0ad754928d 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -931,7 +931,8 @@ State::Result Connection::SynReceived::handle(Connection& tcp, Packet_view& in) */ if(tcb.SND.UNA <= in.ack() and in.ack() <= tcb.SND.NXT) { - debug2(" SND.UNA =< SEG.ACK =< SND.NXT, continue in ESTABLISHED. \n"); + debug2(" %s SND.UNA =< SEG.ACK =< SND.NXT, continue in ESTABLISHED.\n", + tcp.to_string().c_str()); tcp.set_state(Connection::Established::instance()); diff --git a/src/net/tcp/listener.cpp b/src/net/tcp/listener.cpp index 144301473e..8fbbd0bbeb 100644 --- a/src/net/tcp/listener.cpp +++ b/src/net/tcp/listener.cpp @@ -115,12 +115,12 @@ void Listener::segment_arrived(Packet_view& packet) { TCPL_PRINT2(" No receipent\n"); } -void Listener::remove(Connection_ptr conn) { +void Listener::remove(const Connection* conn) { TCPL_PRINT2(" Try remove %s\n", conn->to_string().c_str()); auto it = syn_queue_.begin(); while(it != syn_queue_.end()) { - if((*it) == conn) + if(it->get() == conn) { syn_queue_.erase(it); debug(" %s removed.\n", conn->to_string().c_str()); @@ -132,7 +132,7 @@ void Listener::remove(Connection_ptr conn) { void Listener::connected(Connection_ptr conn) { debug(" %s connected\n", conn->to_string().c_str()); - remove(conn); + remove(conn.get()); Expects(conn->is_connected()); if (UNLIKELY(! host_.add_connection(conn))) return; From 6e99a2e96d4333ffcc5267f0f6c15617b9dad0b9 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Thu, 13 Dec 2018 15:22:50 +0100 Subject: [PATCH 52/93] test: update stress test to handle new arping API --- test/stress/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/stress/test.py b/test/stress/test.py index 4b35dc3144..92cae05a37 100755 --- a/test/stress/test.py +++ b/test/stress/test.py @@ -124,7 +124,7 @@ def httperf(burst_size = BURST_SIZE, burst_interval = BURST_INTERVAL): # Fire a single burst of ARP requests def ARP_burst(burst_size = BURST_SIZE, burst_interval = BURST_INTERVAL): # Note: Arping requires sudo, and we expect the bridge 'bridge43' to be present - command = ["sudo", "arping", "-q","-w", str(100), "-I", "bridge43", "-c", str(burst_size * 10), HOST] + command = ["sudo", "arping", "-q","-W", str(0.0001), "-I", "bridge43", "-c", str(burst_size * 10), HOST] print color.DATA(" ".join(command)) time.sleep(0.5) res = subprocess32.check_call(command, timeout=thread_timeout); From 41631eefd8f06d6d753ce793b64ebdec98ec47aa Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 14 Dec 2018 13:24:41 +0100 Subject: [PATCH 53/93] pmr: add on_avail event for notification of allocatable >= N --- api/util/alloc_pmr.hpp | 5 ++++ api/util/detail/alloc_pmr.hpp | 12 +++++++- test/util/unit/pmr_alloc_test.cpp | 49 +++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp index d8cec64efa..8cbe54c98f 100644 --- a/api/util/alloc_pmr.hpp +++ b/api/util/alloc_pmr.hpp @@ -91,12 +91,17 @@ namespace os::mem { /** Fires when the resource has been full and is not full anymore **/ void on_non_full(Event e){ non_full = e; } + /** Fires on transition from < N bytes to >= N bytes allocatable **/ + void on_avail(std::size_t N, Event e) { avail_thresh = N; avail = e; } + private: Pool_ptr pool_; std::size_t used = 0; std::size_t allocs = 0; std::size_t deallocs = 0; + std::size_t avail_thresh = 0; Event non_full{}; + Event avail{}; }; struct Default_pmr : public std::pmr::memory_resource { diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index f3524a2635..68fbb2227d 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -277,11 +277,21 @@ namespace os::mem { void Pmr_resource::do_deallocate(void* ptr, std::size_t s, std::size_t a) { Expects(s != 0); // POSIX malloc will allow size 0, but return nullptr. bool trigger_non_full = UNLIKELY(full() and non_full != nullptr); + bool trigger_avail_thresh = UNLIKELY(allocatable() < avail_thresh + and allocatable() + s >= avail_thresh + and avail != nullptr); pool_->deallocate(ptr,s,a); deallocs++; used -= s; - if (trigger_non_full) { + + if (UNLIKELY(trigger_avail_thresh)) { + Ensures(allocatable() >= avail_thresh); + Ensures(avail != nullptr); + avail(*this); + } + + if (UNLIKELY(trigger_non_full)) { Ensures(!full()); Ensures(non_full != nullptr); non_full(*this); diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index d9669145d5..51cdc1b0ae 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -294,6 +294,55 @@ CASE("pmr::on_non_full event") { } +CASE("pmr::on_avail event") { + using namespace util; + constexpr auto pool_cap = 400_KiB; + + // Using default resource capacity, which is pool_cap / allocator count + os::mem::Pmr_pool pool{pool_cap, pool_cap}; + auto res = pool.get_resource(); + bool event_fired = false; + + res->on_avail(200_KiB, [&](auto& r){ + EXPECT(&r == res.get()); + EXPECT(not r.full()); + EXPECT(r.allocatable() >= 200_KiB); + event_fired = true; + }); + + std::pmr::polymorphic_allocator alloc{res.get()}; + std::pmr::vector numbers{alloc}; + + numbers.push_back(0); + numbers.push_back(1); + EXPECT(not event_fired); + + auto reserved = 201_KiB; + numbers.reserve(reserved); + EXPECT(numbers.capacity() == reserved); + EXPECT(res->allocated() == reserved); + EXPECT(not event_fired); + + // In order to shrink, it needs to allocate new space for 2 chars then copy. + numbers.shrink_to_fit(); + EXPECT(res->allocated() < reserved); + EXPECT(event_fired); + event_fired = false; + EXPECT(not event_fired); + + for (int i = 2; i < 40_KiB; i++) { + numbers.push_back(i); + } + + EXPECT(not event_fired); + EXPECT(not res->full()); + + numbers.clear(); + numbers.shrink_to_fit(); + EXPECT(not event_fired); + +} + CASE("pmr::default resource cap") { // Not providing a resource cap will give each resource a proportion of max From d640be09239628430a84c883af3a2eb69e83a2aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Sat, 15 Dec 2018 12:01:04 +0100 Subject: [PATCH 54/93] tcp: Avoid bug when ts get lost by reparsing if lost, trigger window update with alloc --- api/net/tcp/connection.hpp | 7 ++++++ api/net/tcp/packet_view.hpp | 40 ++++++++++++++++++++++--------- src/net/tcp/connection.cpp | 30 +++++++++++++++++++---- src/net/tcp/connection_states.cpp | 3 ++- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index d40105dc1a..3541fb7e7e 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -873,6 +873,13 @@ class Connection { uint32_t calculate_rcv_wnd() const; + void send_window_update() { + update_rcv_wnd(); + send_ack(); + } + + void trigger_window_update(os::mem::Pmr_resource& res); + /** * @brief Receive data from an incoming packet containing data. * diff --git a/api/net/tcp/packet_view.hpp b/api/net/tcp/packet_view.hpp index 2f4a707c3c..1fadc3e43a 100644 --- a/api/net/tcp/packet_view.hpp +++ b/api/net/tcp/packet_view.hpp @@ -163,7 +163,10 @@ class Packet_v { const Option::opt_ts* ts_option() const noexcept { return ts_opt; } - inline const Option::opt_ts* parse_ts_option() noexcept; + inline const Option::opt_ts* parse_ts_option() const noexcept; + + void set_ts_option(const Option::opt_ts* opt) + { this->ts_opt = opt; } // Data // @@ -238,7 +241,7 @@ class Packet_v { private: - Option::opt_ts* ts_opt = nullptr; + const Option::opt_ts* ts_opt = nullptr; virtual void set_ip_src(const net::Addr& addr) noexcept = 0; virtual void set_ip_dst(const net::Addr& addr) noexcept = 0; @@ -312,19 +315,34 @@ inline void Packet_v::add_tcp_option_aligned(Args&&... args) { set_length(); // update } -// assumes the packet contains no other options. template -inline const Option::opt_ts* Packet_v::parse_ts_option() noexcept +inline const Option::opt_ts* Packet_v::parse_ts_option() const noexcept { auto* opt = this->tcp_options(); - // TODO: improve by iterate option instead of byte (see Connection::parse_options) - while(((Option*)opt)->kind == Option::NOP and opt < (uint8_t*)this->tcp_data()) - opt++; - - if(((Option*)opt)->kind == Option::TS) - this->ts_opt = (Option::opt_ts*)opt; + while(opt < (uint8_t*)this->tcp_data()) + { + auto* option = (Option*)opt; + switch(option->kind) + { + case Option::NOP: { + opt++; + break; + } + + case Option::TS: { + return reinterpret_cast(option); + } + + case Option::END: { + return nullptr; + } + + default: + opt += option->length; + } + } - return this->ts_opt; + return nullptr; } template diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 796d004d47..3583ea902a 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -66,6 +66,7 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) Expects(bufalloc != nullptr); read_request.reset( new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), cb, bufalloc.get())); + bufalloc->on_non_full({this, &Connection::trigger_window_update}); } // read request is already set, only reset if new size. else @@ -417,6 +418,10 @@ bool Connection::handle_ack(const Packet_view& in) if(cb.SND.TS_OK) { const auto* ts = in.ts_option(); + // reparse to avoid case when stored ts suddenly get lost + if(ts == nullptr) + ts = in.parse_ts_option(); + if(ts != nullptr) // TODO: not sure the packet is valid if TS missing last_acked_ts_ = ts->ecr; } @@ -578,7 +583,7 @@ void Connection::on_dup_ack(const Packet_view& in) // 3 dup acks else if(dup_acks_ == 3) { - debug(" Dup ACK == 3 - %u\n", cb.SND.UNA); + debug(" Dup ACK == 3 - UNA=%u recover=%u\n", cb.SND.UNA, cb.recover); if(cb.SND.UNA - 1 > cb.recover) goto fast_rtx; @@ -587,9 +592,14 @@ void Connection::on_dup_ack(const Packet_view& in) if(cb.SND.TS_OK) { const auto* ts = in.ts_option(); - if(ts != nullptr and last_acked_ts_ == ts->ecr) + // reparse to avoid case when stored ts suddenly get lost + if(ts == nullptr) + ts = in.parse_ts_option(); + + if(ts != nullptr) { - goto fast_rtx; + if(last_acked_ts_ == ts->ecr) + goto fast_rtx; } } // 4.1. ACK Heuristic @@ -597,13 +607,13 @@ void Connection::on_dup_ack(const Packet_view& in) { goto fast_rtx; } - return; fast_rtx: { cb.recover = cb.SND.NXT; - debug(" Enter Recovery - Flight Size: %u\n", flight_size()); + debug(" Enter Recovery %u - Flight Size: %u\n", + cb.recover, flight_size()); fast_retransmit(); } } @@ -653,6 +663,13 @@ void Connection::rtx_ack(const seq_t ack) { // x-rtx_q.size(), rtx_q.size()); } +void Connection::trigger_window_update(os::mem::Pmr_resource& res) +{ + //printf("window freed up? %zu\n", res.allocatable()); + if(res.allocatable() >= (host_.max_bufsize() * Read_request::buffer_limit)) + send_window_update(); +} + uint32_t Connection::calculate_rcv_wnd() const { // PRECISE REPORTING @@ -904,6 +921,9 @@ void Connection::take_rtt_measure(const Packet_view& packet) if(cb.SND.TS_OK) { const auto* ts = packet.ts_option(); + // reparse to avoid case when stored ts suddenly get lost + if(ts == nullptr) + ts = packet.parse_ts_option(); if(ts) { rttm.rtt_measurement(RTTM::milliseconds{host_.get_ts_value() - ntohl(ts->ecr)}); diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 0ad754928d..0fd074e5c1 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -101,9 +101,10 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) if(tcb.SND.TS_OK and in.tcp_header_length() == HEADER_WITH_TS) { const auto* ts = in.parse_ts_option(); + in.set_ts_option(ts); // PAWS - if(UNLIKELY(ts != nullptr and (ntohl(ts->val) < tcb.TS_recent and !in.isset(RST)))) + if(UNLIKELY(ts != nullptr and (ts->get_val() < tcb.TS_recent and !in.isset(RST)))) { /* If the connection has been idle more than 24 days, From ebc56aaafd937d56f68c34921db7b2f78bfd1a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Sat, 15 Dec 2018 13:11:57 +0100 Subject: [PATCH 55/93] tcp: Change trigger send window event to avail --- src/net/tcp/connection.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 3583ea902a..c448a36823 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -66,7 +66,8 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) Expects(bufalloc != nullptr); read_request.reset( new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), cb, bufalloc.get())); - bufalloc->on_non_full({this, &Connection::trigger_window_update}); + const size_t avail_thres = host_.max_bufsize() * Read_request::buffer_limit; + bufalloc->on_avail(avail_thres, {this, &Connection::trigger_window_update}); } // read request is already set, only reset if new size. else From 6fa961734bf7d3368fc364f9e607e96ae44a8272 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Mon, 17 Dec 2018 11:08:34 +0100 Subject: [PATCH 56/93] net: remove wrong endif from new checksum.cpp --- src/net/checksum.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/net/checksum.cpp b/src/net/checksum.cpp index ab7c992d9c..065152fea5 100644 --- a/src/net/checksum.cpp +++ b/src/net/checksum.cpp @@ -20,7 +20,6 @@ #if defined(__AVX2__) #include -#endif #elif defined(__SSSE3__) #include #endif From 04deec3aff7ec83f26abf217db7d7c9cb2797505 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Mon, 17 Dec 2018 17:56:32 +0100 Subject: [PATCH 57/93] microlb: removed code that closed connection once buffer limit of 8k was reached --- lib/microLB/micro_lb/balancer.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 770c5653ad..2150dd89df 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -2,7 +2,6 @@ #include #define READQ_PER_CLIENT 4096 -#define MAX_READQ_PER_NODE 8192 #define READQ_FOR_NODES 8192 #define MAX_OUTGOING_ATTEMPTS 100 // checking if nodes are dead or not @@ -155,13 +154,8 @@ namespace microLB [this] (auto buf) { // prevent buffer bloat attack this->total += buf->size(); - if (this->total > MAX_READQ_PER_NODE) { - this->conn->close(); - } - else { - LBOUT("*** Queued %lu bytes\n", buf->size()); - readq.push_back(buf); - } + LBOUT("*** Queued %lu bytes\n", buf->size()); + readq.push_back(buf); }); } From 39d538e2a984a3c555783788c263952e269f3601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Tue, 18 Dec 2018 13:18:15 +0100 Subject: [PATCH 58/93] tcp: Avoid wrap in seq num check, reparse TS if missing --- src/net/tcp/connection.cpp | 8 +++++--- src/net/tcp/connection_states.cpp | 25 ++++++------------------- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index c448a36823..73c75ca1e5 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -666,9 +666,11 @@ void Connection::rtx_ack(const seq_t ack) { void Connection::trigger_window_update(os::mem::Pmr_resource& res) { - //printf("window freed up? %zu\n", res.allocatable()); - if(res.allocatable() >= (host_.max_bufsize() * Read_request::buffer_limit)) + const auto reserve = (host_.max_bufsize() * Read_request::buffer_limit); + if(res.allocatable() >= reserve and cb.RCV.WND == 0) { + //printf("allocatable=%zu cur_win=%u\n", res.allocatable(), cb.RCV.WND); send_window_update(); + } } uint32_t Connection::calculate_rcv_wnd() const @@ -799,7 +801,7 @@ void Connection::recv_data(const Packet_view& in) } } // Packet out of order - else if((in.seq() - cb.RCV.NXT) < cb.RCV.WND) + else if(( (in.seq() + in.tcp_data_length()) - cb.RCV.NXT) < cb.RCV.WND) { // only accept the data if we have a read request if(read_request != nullptr) diff --git a/src/net/tcp/connection_states.cpp b/src/net/tcp/connection_states.cpp index 0fd074e5c1..acce2b2ba9 100644 --- a/src/net/tcp/connection_states.cpp +++ b/src/net/tcp/connection_states.cpp @@ -94,7 +94,6 @@ using namespace std; bool Connection::State::check_seq(Connection& tcp, Packet_view& in) { auto& tcb = tcp.tcb(); - uint32_t packet_end = static_cast(in.seq() + in.tcp_data_length()-1); // RFC 7323 static constexpr uint8_t HEADER_WITH_TS{sizeof(Header) + 12}; @@ -127,24 +126,9 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) goto unacceptable; // #2 - Packet is ahead of what we expect to receive, but inside our window - if( tcb.RCV.NXT <= in.seq() and in.seq() < tcb.RCV.NXT + tcb.RCV.WND ) { + if( (in.seq() - tcb.RCV.NXT) < tcb.RCV.WND ) { goto acceptable; } - // #3 (INVALID) - Packet is outside the right edge of the recv window - else if( packet_end > tcb.RCV.NXT+tcb.RCV.WND ) { - //printf("Outside right: %s NXT=%u WND=%u\n", in.to_string().c_str(), tcb.RCV.NXT, tcb.RCV.WND); - goto unacceptable; - } - // #4 - Packet with payload is what we expect or bigger, but inside our window - else if( tcb.RCV.NXT <= packet_end - and packet_end < tcb.RCV.NXT+tcb.RCV.WND ) { - goto acceptable; - } - else - { - //printf("Probably outside on left side %s end=%u NXT=%u WND=%u\n", - // in.to_string().c_str(), packet_end, tcb.RCV.NXT, tcb.RCV.WND); - } /* If an incoming segment is not acceptable, an acknowledgment should be sent in reply (unless the RST bit is set, if so drop @@ -167,10 +151,13 @@ bool Connection::State::check_seq(Connection& tcp, Packet_view& in) acceptable: const auto* ts = in.ts_option(); + if(tcb.SND.TS_OK) + ts = in.parse_ts_option(); + if(ts != nullptr and - (ntohl(ts->val) >= tcb.TS_recent and in.seq() <= tcp.last_ack_sent_)) + (ts->get_val() >= tcb.TS_recent and in.seq() <= tcp.last_ack_sent_)) { - tcb.TS_recent = ntohl(ts->val); + tcb.TS_recent = ts->get_val(); } debug2(" Acceptable SEQ: %u \n", in.seq()); // is acceptable. From 4debeac834f7a246b9ccbf5dcfdb7401d944c1bf Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Tue, 18 Dec 2018 17:10:19 +0100 Subject: [PATCH 59/93] net: added function to check if a buffer has unhandled data to check before deleting --- api/net/tcp/read_buffer.hpp | 8 ++++++++ src/net/tcp/read_request.cpp | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/api/net/tcp/read_buffer.hpp b/api/net/tcp/read_buffer.hpp index 7b657bee76..a7539ebd57 100644 --- a/api/net/tcp/read_buffer.hpp +++ b/api/net/tcp/read_buffer.hpp @@ -84,6 +84,14 @@ class Read_buffer { buffer_t buffer() { return buf; } + /** + * @brief Check if internal buffer has unhandled data + * + * @return True if the internal buffer is unique with data and doesnt contain hole + */ + bool has_unhandled_data() + { return (buf.unique() && (size() > 0) && (missing() == 0)); } + /** * @brief Exposes the internal buffer (read only) * diff --git a/src/net/tcp/read_request.cpp b/src/net/tcp/read_request.cpp index 1fba23ba71..98b72b50b4 100644 --- a/src/net/tcp/read_request.cpp +++ b/src/net/tcp/read_request.cpp @@ -202,8 +202,9 @@ namespace tcp { // if noone is using the buffer right now, (stupid yes) // AND it contains data without any holes, // return it to the user - if(buf->buffer().unique() and buf->size() > 0 and buf->missing() == 0) + if (buf->has_unhandled_data()) { + callback(buf->buffer()); } // reset the first buffer From d6b28aff941fde5e1555d5d7e824a03ecc57bc70 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Tue, 18 Dec 2018 17:14:04 +0100 Subject: [PATCH 60/93] microlb: wrong ordering between flush and session create caused out of order data --- lib/microLB/micro_lb/balancer.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 2150dd89df..317e55809a 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -202,13 +202,14 @@ namespace microLB assert(outgoing->is_connected()); LBOUT("Assigning client to node %d (%s)\n", algo_iterator, outgoing->to_string().c_str()); - auto& session = this->create_session( - std::move(conn), std::move(outgoing)); - // flush readq to session.outgoing + // flush readq to outgoing before creating session for (auto buffer : readq) { LBOUT("*** Flushing %lu bytes\n", buffer->size()); - session.outgoing->write(buffer); + outgoing->write(buffer); } + auto& session = this->create_session( + std::move(conn), std::move(outgoing)); + return nullptr; } } From 079e8e42f8a1923f0218490bc1215dbc6fbcd0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Thu, 20 Dec 2018 10:59:26 +0100 Subject: [PATCH 61/93] tcp: added some debugging prints, remove later --- src/net/tcp/connection.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 73c75ca1e5..e531818910 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -405,6 +405,8 @@ bool Connection::handle_ack(const Packet_view& in) if(is_win_update(in, true_win)) { + if(cb.SND.WND < SMSS()*2) + printf("Win update: %u => %u\n", cb.SND.WND, true_win); cb.SND.WND = true_win; cb.SND.WL1 = in.seq(); cb.SND.WL2 = in.ack(); @@ -584,7 +586,7 @@ void Connection::on_dup_ack(const Packet_view& in) // 3 dup acks else if(dup_acks_ == 3) { - debug(" Dup ACK == 3 - UNA=%u recover=%u\n", cb.SND.UNA, cb.recover); + printf(" Dup ACK == 3 - UNA=%u recover=%u\n", cb.SND.UNA, cb.recover); if(cb.SND.UNA - 1 > cb.recover) goto fast_rtx; @@ -968,8 +970,8 @@ void Connection::retransmit() { // TODO: Finish to send window zero probe, but only on rtx timeout - debug2(" With data (wq.sz=%u) buf.unacked=%u\n", - writeq.size(), buf->size(), buf->size() - writeq.acked()); + printf(" With data (wq.sz=%zu) buf.size=%zu buf.unacked=%zu SND.WND=%u CWND=%u\n", + writeq.size(), buf->size(), buf->size() - writeq.acked(), cb.SND.WND, cb.cwnd); fill_packet(*packet, buf->data() + writeq.acked(), buf->size() - writeq.acked()); packet->set_flag(PSH); } @@ -1041,8 +1043,8 @@ void Connection::rtx_clear() { begins (i.e., after the three-way handshake completes). */ void Connection::rtx_timeout() { - debug(" Timed out (RTO %lld ms). FS: %u\n", - rttm.rto_ms().count(), flight_size()); + printf(" Timed out (RTO %lld ms). FS: %u usable=%u\n", + rttm.rto_ms().count(), flight_size(), usable_window()); signal_rtx_timeout(); // experimental @@ -1383,12 +1385,12 @@ void Connection::reduce_ssthresh() { fs = (fs >= two_seg) ? fs - two_seg : 0; cb.ssthresh = std::max( (fs / 2), two_seg ); - debug2(" Slow start threshold reduced: %u\n", + printf(" Slow start threshold reduced: %u\n", cb.ssthresh); } void Connection::fast_retransmit() { - debug(" Fast retransmit initiated.\n"); + printf(" Fast retransmit initiated.\n"); // reduce sshtresh reduce_ssthresh(); // retransmit segment starting SND.UNA @@ -1403,5 +1405,5 @@ void Connection::finish_fast_recovery() { fast_recovery_ = false; //cb.cwnd = std::min(cb.ssthresh, std::max(flight_size(), (uint32_t)SMSS()) + SMSS()); cb.cwnd = cb.ssthresh; - debug(" Finished Fast Recovery - Cwnd: %u\n", cb.cwnd); + printf(" Finished Fast Recovery - Cwnd: %u\n", cb.cwnd); } From 6f7c159756377408a7bb61dd796d1527074becc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 4 Jan 2019 13:05:59 +0100 Subject: [PATCH 62/93] tcp: comment out some debugging --- src/net/tcp/connection.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index e531818910..133006a049 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -405,8 +405,8 @@ bool Connection::handle_ack(const Packet_view& in) if(is_win_update(in, true_win)) { - if(cb.SND.WND < SMSS()*2) - printf("Win update: %u => %u\n", cb.SND.WND, true_win); + //if(cb.SND.WND < SMSS()*2) + // printf("Win update: %u => %u\n", cb.SND.WND, true_win); cb.SND.WND = true_win; cb.SND.WL1 = in.seq(); cb.SND.WL2 = in.ack(); @@ -586,7 +586,7 @@ void Connection::on_dup_ack(const Packet_view& in) // 3 dup acks else if(dup_acks_ == 3) { - printf(" Dup ACK == 3 - UNA=%u recover=%u\n", cb.SND.UNA, cb.recover); + //printf(" Dup ACK == 3 - UNA=%u recover=%u\n", cb.SND.UNA, cb.recover); if(cb.SND.UNA - 1 > cb.recover) goto fast_rtx; @@ -970,8 +970,8 @@ void Connection::retransmit() { // TODO: Finish to send window zero probe, but only on rtx timeout - printf(" With data (wq.sz=%zu) buf.size=%zu buf.unacked=%zu SND.WND=%u CWND=%u\n", - writeq.size(), buf->size(), buf->size() - writeq.acked(), cb.SND.WND, cb.cwnd); + //printf(" With data (wq.sz=%zu) buf.size=%zu buf.unacked=%zu SND.WND=%u CWND=%u\n", + // writeq.size(), buf->size(), buf->size() - writeq.acked(), cb.SND.WND, cb.cwnd); fill_packet(*packet, buf->data() + writeq.acked(), buf->size() - writeq.acked()); packet->set_flag(PSH); } @@ -1043,8 +1043,8 @@ void Connection::rtx_clear() { begins (i.e., after the three-way handshake completes). */ void Connection::rtx_timeout() { - printf(" Timed out (RTO %lld ms). FS: %u usable=%u\n", - rttm.rto_ms().count(), flight_size(), usable_window()); + //printf(" Timed out (RTO %lld ms). FS: %u usable=%u\n", + // rttm.rto_ms().count(), flight_size(), usable_window()); signal_rtx_timeout(); // experimental @@ -1385,12 +1385,12 @@ void Connection::reduce_ssthresh() { fs = (fs >= two_seg) ? fs - two_seg : 0; cb.ssthresh = std::max( (fs / 2), two_seg ); - printf(" Slow start threshold reduced: %u\n", - cb.ssthresh); + //printf(" Slow start threshold reduced: %u\n", + // cb.ssthresh); } void Connection::fast_retransmit() { - printf(" Fast retransmit initiated.\n"); + //printf(" Fast retransmit initiated.\n"); // reduce sshtresh reduce_ssthresh(); // retransmit segment starting SND.UNA @@ -1405,5 +1405,5 @@ void Connection::finish_fast_recovery() { fast_recovery_ = false; //cb.cwnd = std::min(cb.ssthresh, std::max(flight_size(), (uint32_t)SMSS()) + SMSS()); cb.cwnd = cb.ssthresh; - printf(" Finished Fast Recovery - Cwnd: %u\n", cb.cwnd); + //printf(" Finished Fast Recovery - Cwnd: %u\n", cb.cwnd); } From ae0b58e5e12d0d75751485a10db6937d50e6b18d Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Sun, 6 Jan 2019 21:48:04 +0100 Subject: [PATCH 63/93] Correct connection tracking cherry picked from ipv6 --- src/net/conntrack.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/net/conntrack.cpp b/src/net/conntrack.cpp index 3621f3ef1b..42cf65f343 100644 --- a/src/net/conntrack.cpp +++ b/src/net/conntrack.cpp @@ -352,23 +352,22 @@ int Conntrack::deserialize_from(void* addr) const auto size = *reinterpret_cast(buffer); buffer += sizeof(size_t); - + size_t dupes = 0; for(auto i = size; i > 0; i--) { // create the entry auto entry = std::make_shared(); buffer += entry->deserialize_from(buffer); - entries.emplace(std::piecewise_construct, - std::forward_as_tuple(entry->first, entry->proto), - std::forward_as_tuple(entry)); - - entries.emplace(std::piecewise_construct, - std::forward_as_tuple(entry->second, entry->proto), - std::forward_as_tuple(entry)); + bool insert = false; + insert = entries.insert_or_assign({entry->first, entry->proto}, entry).second; + if(not insert) + dupes++; + insert = entries.insert_or_assign({entry->second, entry->proto}, entry).second; + if(not insert) + dupes++; } - - Ensures(entries.size() - prev_size == size * 2); + Ensures(entries.size() - (prev_size-dupes) == size * 2); return buffer - reinterpret_cast(addr); } From 74a5a6eb13c8648e5fba88fb48a22939715b8b2a Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 8 Jan 2019 14:38:30 +0100 Subject: [PATCH 64/93] tcp: add on_data event with sync read. WIP. --- api/net/tcp/connection.hpp | 37 ++++++++++++++++++++++ api/net/tcp/connection.inc | 17 +++++++++- api/net/tcp/read_request.hpp | 13 ++++++-- lib/LiveUpdate/serialize_tcp.cpp | 2 +- src/net/tcp/connection.cpp | 54 +++++++++++++++++++++++++++----- src/net/tcp/read_request.cpp | 50 +++++++++++++++++++++++++---- 6 files changed, 155 insertions(+), 18 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index 3541fb7e7e..f49dfd4062 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -93,6 +93,35 @@ class Connection { */ inline Connection& on_read(size_t recv_bufsz, ReadCallback callback); + + using DataCallback = delegate; + /** + * @brief Event when incoming data is received by the connection. + * The callback is called when either 1) PSH is seen, or 2) the buffer is full + * + * The user is expected to fetch data by calling read_next, otherwise the + * event will be triggered again. Unread data will be buffered as long as + * there is capacity in the read queue. + * If an on_read callback is also registered, this event has no effect. + * + * @param[in] callback The callback + * + * @return This connection + */ + inline Connection& on_data(DataCallback callback); + + /** + * @brief Read the next fully acked chunk of received data if any. + * + * @return Pointer to buffer if any, otherwise nullptr. + */ + inline buffer_t read_next(); + + /** + * @return The size of the next fully acked chunk of received data. + */ + inline size_t next_size(); + /** Called with the connection itself and the reason wrapped in a Disconnect struct. */ using DisconnectCallback = delegate; /** @@ -714,6 +743,14 @@ class Connection { */ void _on_read(size_t recv_bufsz, ReadCallback cb); + /** + * @brief Set the on_data handler + * + * @param[in] cb The callback + */ + void _on_data(DataCallback cb); + + // Retrieve the associated shared_ptr for a connection, if it exists // Throws out_of_range if it doesn't Connection_ptr retrieve_shared(); diff --git a/api/net/tcp/connection.inc b/api/net/tcp/connection.inc index 166d3ef0e2..9cc298136a 100644 --- a/api/net/tcp/connection.inc +++ b/api/net/tcp/connection.inc @@ -14,6 +14,11 @@ inline Connection& Connection::on_read(size_t recv_bufsz, ReadCallback cb) return *this; } +inline Connection& Connection::on_data(DataCallback cb) { + _on_data(cb); + return *this; +} + inline Connection& Connection::on_disconnect(DisconnectCallback cb) { on_disconnect_ = cb; return *this; @@ -31,7 +36,7 @@ inline Connection& Connection::on_close(CloseCallback cb) { inline Connection& Connection::set_on_read_callback(ReadCallback cb) { Expects(read_request != nullptr && "on_read hasn't been called before."); - read_request->callback = cb; + read_request->on_read_callback = cb; return *this; } @@ -40,6 +45,16 @@ inline Connection& Connection::_on_cleanup(CleanupCallback cb) { return *this; } +inline buffer_t Connection::read_next() { + static buffer_t empty_buf{}; + if (read_request == nullptr) { + return empty_buf; + } + return read_request->read_next(); +} + + + inline void Connection::write(const void* buf, size_t n) { this->write(tcp::construct_buffer((uint8_t*) buf, (uint8_t*) buf + n)); } diff --git a/api/net/tcp/read_request.hpp b/api/net/tcp/read_request.hpp index efe9e93e02..203dc329e3 100644 --- a/api/net/tcp/read_request.hpp +++ b/api/net/tcp/read_request.hpp @@ -30,12 +30,15 @@ class Read_request { public: using Buffer_ptr = std::unique_ptr; using Buffer_queue = std::deque; + using Ready_queue = std::deque; using ReadCallback = delegate; + using DataCallback = delegate; using Alloc = os::mem::buffer::allocator_type; static constexpr size_t buffer_limit = 2; - ReadCallback callback; + ReadCallback on_read_callback = nullptr; + DataCallback on_data_callback = nullptr; - Read_request(seq_t start, size_t min, size_t max, ReadCallback cb, Alloc&& alloc = Alloc()); + Read_request(seq_t start, size_t min, size_t max, Alloc&& alloc = Alloc()); size_t insert(seq_t seq, const uint8_t* data, size_t n, bool psh = false); @@ -47,6 +50,9 @@ class Read_request { void reset(const seq_t seq); + size_t next_size(); + buffer_t read_next(); + const Read_buffer& front() const { return *buffers.front(); } @@ -57,7 +63,10 @@ class Read_request { { return buffers; } private: + void signal_data(); + Buffer_queue buffers; + Ready_queue complete_buffers; Alloc alloc; Read_buffer* get_buffer(const seq_t seq); diff --git a/lib/LiveUpdate/serialize_tcp.cpp b/lib/LiveUpdate/serialize_tcp.cpp index ce3e1ace5e..47ab82692d 100644 --- a/lib/LiveUpdate/serialize_tcp.cpp +++ b/lib/LiveUpdate/serialize_tcp.cpp @@ -182,7 +182,7 @@ void Connection::deserialize_from(void* addr) auto* readq = (read_buffer*) &area->vla[writeq_len]; if (readq->capacity) { - read_request = std::make_unique(readq->seq, readq->capacity, host_.max_bufsize(), nullptr, bufalloc.get()); + read_request = std::make_unique(readq->seq, readq->capacity, host_.max_bufsize(), bufalloc.get()); read_request->front().deserialize_from(readq); } diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index 73c75ca1e5..0bc326336e 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -65,7 +65,8 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) { Expects(bufalloc != nullptr); read_request.reset( - new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), cb, bufalloc.get())); + new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), bufalloc.get())); + read_request->on_read_callback = cb; const size_t avail_thres = host_.max_bufsize() * Read_request::buffer_limit; bufalloc->on_avail(avail_thres, {this, &Connection::trigger_window_update}); } @@ -73,7 +74,7 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) else { //printf("on_read already set\n"); - read_request->callback = cb; + read_request->on_read_callback = cb; // this will flush the current data to the user (if any) read_request->reset(this->cb.RCV.NXT); @@ -84,6 +85,32 @@ void Connection::_on_read(size_t recv_bufsz, ReadCallback cb) } } +void Connection::_on_data(DataCallback cb) { + if(read_request == nullptr) + { + Expects(bufalloc != nullptr); + read_request.reset( + new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), bufalloc.get())); + read_request->on_data_callback = cb; + const size_t avail_thres = host_.max_bufsize() * Read_request::buffer_limit; + bufalloc->on_avail(avail_thres, {this, &Connection::trigger_window_update}); + } + // read request is already set, only reset if new size. + else + { + //printf("on_read already set\n"); + read_request->on_data_callback = cb; + + read_request->reset(this->cb.RCV.NXT); + + // due to throwing away buffers (and all data) we also + // need to clear the sack list if anything is stored here. + if(sack_list) + sack_list->clear(); + } +} + + Connection_ptr Connection::retrieve_shared() { return host_.retrieve_shared(this); } @@ -120,8 +147,10 @@ void Connection::reset_callbacks() writeq.on_write(nullptr); on_close_.reset(); recv_wnd_getter.reset(); - if(read_request) - read_request->callback.reset(); + if(read_request) { + read_request->on_read_callback.reset(); + read_request->on_data_callback.reset(); + } } uint16_t Connection::MSDS() const noexcept { @@ -277,7 +306,7 @@ void Connection::close() { void Connection::receive_disconnect() { Expects(read_request and read_request->size()); - if(read_request->callback) { + if(read_request->on_read_callback) { // TODO: consider adding back when SACK is complete //auto& buf = read_request->buffer; //if (buf.size() > 0 && buf.missing() == 0) @@ -1150,13 +1179,20 @@ void Connection::start_dack() void Connection::signal_connect(const bool success) { - // if on read was set before we got a seq number, + // if read request was set before we got a seq number, // update the starting sequence number for the read buffer if(read_request and success) read_request->set_start(cb.RCV.NXT); if(on_connect_) (success) ? on_connect_(retrieve_shared()) : on_connect_(nullptr); + + // If no data event was registered we still want to start buffering here, + // in case the user is not yet ready to subscribe to data. + if (read_request == nullptr and success) { + read_request.reset( + new Read_request(this->cb.RCV.NXT, host_.min_bufsize(), host_.max_bufsize(), bufalloc.get())); + } } void Connection::signal_close() @@ -1185,8 +1221,10 @@ void Connection::clean_up() { on_disconnect_.reset(); on_close_.reset(); recv_wnd_getter.reset(); - if(read_request) - read_request->callback.reset(); + if(read_request) { + read_request->on_read_callback.reset(); + read_request->on_data_callback.reset(); + } debug2(" Call clean_up delg on %s\n", to_string().c_str()); diff --git a/src/net/tcp/read_request.cpp b/src/net/tcp/read_request.cpp index 98b72b50b4..7dee72d4ef 100644 --- a/src/net/tcp/read_request.cpp +++ b/src/net/tcp/read_request.cpp @@ -20,9 +20,8 @@ namespace net { namespace tcp { - Read_request::Read_request(seq_t start, size_t min, size_t max, - ReadCallback cb, Alloc&& alloc) - : callback{cb}, alloc{alloc} + Read_request::Read_request(seq_t start, size_t min, size_t max, Alloc&& alloc) + : alloc{alloc} { buffers.push_back(std::make_unique(start, min, max, alloc)); } @@ -57,7 +56,9 @@ namespace tcp { { const auto rem = buf->capacity() - buf->size(); const auto end_seq = buf->end_seq(); // store end_seq if reseted in callback - if (callback) callback(buf->buffer()); + + // Ready buffer for userspace consumption + complete_buffers.push_back(buf->buffer()); // this is the only one, so we can reuse it if(buffers.size() == 1) @@ -99,6 +100,8 @@ namespace tcp { } // < while(n) + signal_data(); + Ensures(not buffers.empty()); return recv; } @@ -185,6 +188,39 @@ namespace tcp { } } + void Read_request::signal_data() { + + if (not complete_buffers.empty()) { + if (on_read_callback != nullptr) { + for (auto buf : complete_buffers) { + on_read_callback(buf); + } + } else if (on_data_callback != nullptr){ + on_data_callback(); + if (not complete_buffers.empty()) { + // FIXME: Make sure this event gets re-triggered + } + } + } + } + + size_t Read_request::next_size() { + if (not complete_buffers.empty()) { + return complete_buffers.front()->size(); + } + return 0; + } + + buffer_t Read_request::read_next() { + static const buffer_t empty_buf {}; + if (not complete_buffers.empty()) { + auto buf = complete_buffers.front(); + complete_buffers.pop_front(); + return buf; + } + return empty_buf; + } + void Read_request::reset(const seq_t seq) { Expects(not buffers.empty()); @@ -204,9 +240,11 @@ namespace tcp { // return it to the user if (buf->has_unhandled_data()) { - - callback(buf->buffer()); + complete_buffers.push_back(buf->buffer()); } + + signal_data(); + // reset the first buffer buf->reset(seq); // throw the others away From bc7ad770a44ce13b574ba7711c8cb38da7fc4f33 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 8 Jan 2019 17:00:00 +0100 Subject: [PATCH 65/93] net::Stream: add on_data & friends. WIP. --- api/net/botan/tls_server.hpp | 12 ++++++++++++ api/net/openssl/tls_stream.hpp | 13 +++++++++++++ api/net/stream.hpp | 21 ++++++++++++++++++++- api/net/tcp/connection.inc | 9 +++++++-- api/net/tcp/stream.hpp | 25 +++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 3 deletions(-) diff --git a/api/net/botan/tls_server.hpp b/api/net/botan/tls_server.hpp index b187243069..7c7aa283f2 100644 --- a/api/net/botan/tls_server.hpp +++ b/api/net/botan/tls_server.hpp @@ -55,6 +55,18 @@ class Server : public Botan::TLS::Callbacks, public net::Stream m_transport->on_read(bs, {this, &Server::tls_read}); this->m_on_read = cb; } + void on_data(DataCallback cb) override { + // FIXME + throw std::runtime_error("on_data not implemented on botan::server"); + } + size_t next_size() override { + // FIXME + throw std::runtime_error("next_size not implemented on botan::server"); + } + buffer_t read_next() override { + // FIXME + throw std::runtime_error("read_next not implemented on botan::server"); + } void on_write(WriteCallback cb) override { this->m_on_write = cb; } diff --git a/api/net/openssl/tls_stream.hpp b/api/net/openssl/tls_stream.hpp index 6ff53ada1a..81225eb254 100644 --- a/api/net/openssl/tls_stream.hpp +++ b/api/net/openssl/tls_stream.hpp @@ -43,6 +43,17 @@ namespace openssl void on_read(size_t, ReadCallback cb) override { m_on_read = std::move(cb); } + void on_data(DataCallback cb) override { + m_on_data = std::move(cb); + } + size_t next_size() override { + // FIXME: implement buffering for read_next + return 0; + } + buffer_t read_next() override { + // FIXME: implement buffering for read_next + return{}; + } void on_close(CloseCallback cb) override { m_on_close = std::move(cb); } @@ -98,6 +109,7 @@ namespace openssl bool m_deferred_close = false; ConnectCallback m_on_connect = nullptr; ReadCallback m_on_read = nullptr; + DataCallback m_on_data = nullptr; WriteCallback m_on_write = nullptr; CloseCallback m_on_close = nullptr; }; @@ -120,6 +132,7 @@ namespace openssl SSL_set_bio(this->m_ssl, this->m_bio_rd, this->m_bio_wr); // always-on callbacks + // FIXME: consider using on_data as the default always-on event. m_transport->on_read(8192, {this, &TLS_stream::tls_read}); m_transport->on_close({this, &TLS_stream::close_callback_once}); diff --git a/api/net/stream.hpp b/api/net/stream.hpp index 9eec91b4c5..66283975c3 100644 --- a/api/net/stream.hpp +++ b/api/net/stream.hpp @@ -56,13 +56,32 @@ namespace net { /** Called with a shared buffer and the length of the data when received. */ using ReadCallback = delegate; /** - * @brief Event when data is received. + * @brief Event when data is received. Pushes data to the callback. * * @param[in] n The size of the receive buffer * @param[in] cb The read callback */ virtual void on_read(size_t n, ReadCallback cb) = 0; + using DataCallback = delegate; + /** + * @brief Event when data is received. + * Does not push data, just signals its presence. + * + * @param[in] cb The callback + */ + virtual void on_data(DataCallback cb) = 0; + + /** + * @return The size of the next available chunk of data if any. + */ + virtual size_t next_size() = 0; + + /** + * @return The next available chunk of data if any. + */ + virtual buffer_t read_next() = 0; + /** Called with nothing ¯\_(ツ)_/¯ */ using CloseCallback = delegate; /** diff --git a/api/net/tcp/connection.inc b/api/net/tcp/connection.inc index 9cc298136a..cb60015695 100644 --- a/api/net/tcp/connection.inc +++ b/api/net/tcp/connection.inc @@ -47,13 +47,18 @@ inline Connection& Connection::_on_cleanup(CleanupCallback cb) { inline buffer_t Connection::read_next() { static buffer_t empty_buf{}; - if (read_request == nullptr) { + if (UNLIKELY(read_request == nullptr)) { return empty_buf; } return read_request->read_next(); } - +inline size_t Connection::next_size() { + if (UNLIKELY(read_request == nullptr)) { + return 0; + } + return read_request->next_size(); +} inline void Connection::write(const void* buf, size_t n) { this->write(tcp::construct_buffer((uint8_t*) buf, (uint8_t*) buf + n)); diff --git a/api/net/tcp/stream.hpp b/api/net/tcp/stream.hpp index 873cd79a22..908ceb078c 100644 --- a/api/net/tcp/stream.hpp +++ b/api/net/tcp/stream.hpp @@ -50,6 +50,31 @@ namespace net::tcp void on_read(size_t n, ReadCallback cb) override { m_tcp->on_read(n, cb); } + /** + * @brief Event when data is received. + * Does not push data, just signals its presence. + * + * @param[in] cb The callback + */ + void on_data(DataCallback cb) override { + m_tcp->on_data(cb); + }; + + /** + * @return The size of the next available chunk of data if any. + */ + size_t next_size() override { + return m_tcp->next_size(); + }; + + /** + * @return The next available chunk of data if any. + */ + buffer_t read_next() override { + return m_tcp->read_next(); + }; + + /** * @brief Event for when the Stream is being closed. * From 3f94857803f5981c6d39f9cd912791ff12088666 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Tue, 8 Jan 2019 17:02:03 +0100 Subject: [PATCH 66/93] tcp: allow buffer reuse and pop completed after pushing to on_read --- src/net/tcp/read_request.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/net/tcp/read_request.cpp b/src/net/tcp/read_request.cpp index 7dee72d4ef..66f0c44233 100644 --- a/src/net/tcp/read_request.cpp +++ b/src/net/tcp/read_request.cpp @@ -57,8 +57,12 @@ namespace tcp { const auto rem = buf->capacity() - buf->size(); const auto end_seq = buf->end_seq(); // store end_seq if reseted in callback - // Ready buffer for userspace consumption - complete_buffers.push_back(buf->buffer()); + if (on_read_callback != nullptr) { + on_read_callback(buf->buffer()); + } else { + // Ready buffer for read_next + complete_buffers.push_back(buf->buffer()); + } // this is the only one, so we can reuse it if(buffers.size() == 1) @@ -191,14 +195,17 @@ namespace tcp { void Read_request::signal_data() { if (not complete_buffers.empty()) { - if (on_read_callback != nullptr) { - for (auto buf : complete_buffers) { - on_read_callback(buf); - } - } else if (on_data_callback != nullptr){ + if (on_data_callback != nullptr){ on_data_callback(); if (not complete_buffers.empty()) { // FIXME: Make sure this event gets re-triggered + // For now the user will have to make sure to re-read later if they couldn't + } + } else if (on_read_callback != nullptr) { + for (auto buf : complete_buffers) { + // Pop each time, in case callback leads to another call here. + complete_buffers.pop_front(); + on_read_callback(buf); } } } From 7e56f127fa6521b01e3bd9cd4f62134f5fd204c7 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Tue, 8 Jan 2019 20:57:21 +0100 Subject: [PATCH 67/93] refactor: Moved tls from header to source code --- api/net/openssl/tls_stream.hpp | 258 +---------------------- src/CMakeLists.txt | 1 + src/net/openssl/tls_stream.cpp | 372 +++++++++++++++++++++++++++++++++ 3 files changed, 377 insertions(+), 254 deletions(-) create mode 100644 src/net/openssl/tls_stream.cpp diff --git a/api/net/openssl/tls_stream.hpp b/api/net/openssl/tls_stream.hpp index 6ff53ada1a..4040b55710 100644 --- a/api/net/openssl/tls_stream.hpp +++ b/api/net/openssl/tls_stream.hpp @@ -4,6 +4,7 @@ #include #include +#include //#define VERBOSE_OPENSSL #ifdef VERBOSE_OPENSSL #define TLS_PRINT(fmt, ...) printf(fmt, ##__VA_ARGS__) @@ -79,6 +80,9 @@ namespace openssl private: void tls_read(buffer_t); int tls_perform_stream_write(); + using Alloc = os::mem::buffer::allocator_type; + std::shared_ptr> tls_buffer; + int tls_write_to_stream(Alloc &alloc); int tls_perform_handshake(); bool handshake_completed() const noexcept; void close_callback_once(); @@ -102,258 +106,4 @@ namespace openssl CloseCallback m_on_close = nullptr; }; - inline TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) - : m_transport(std::move(t)) - { - ERR_clear_error(); // prevent old errors from mucking things up - this->m_bio_rd = BIO_new(BIO_s_mem()); - this->m_bio_wr = BIO_new(BIO_s_mem()); - assert(ERR_get_error() == 0 && "Initializing BIOs"); - this->m_ssl = SSL_new(ctx); - assert(this->m_ssl != nullptr); - assert(ERR_get_error() == 0 && "Initializing SSL"); - // TLS server-mode - if (outgoing == false) - SSL_set_accept_state(this->m_ssl); - else - SSL_set_connect_state(this->m_ssl); - - SSL_set_bio(this->m_ssl, this->m_bio_rd, this->m_bio_wr); - // always-on callbacks - m_transport->on_read(8192, {this, &TLS_stream::tls_read}); - m_transport->on_close({this, &TLS_stream::close_callback_once}); - - // start TLS handshake process - if (outgoing == true) - { - if (this->tls_perform_handshake() < 0) return; - } - } - inline TLS_stream::TLS_stream(Stream_ptr t, SSL* ssl, BIO* rd, BIO* wr) - : m_transport(std::move(t)), m_ssl(ssl), m_bio_rd(rd), m_bio_wr(wr) - { - // always-on callbacks - m_transport->on_read(8192, {this, &TLS_stream::tls_read}); - m_transport->on_close({this, &TLS_stream::close_callback_once}); - } - inline TLS_stream::~TLS_stream() - { - assert(m_busy == false && "Cannot delete stream while in its call stack"); - SSL_free(this->m_ssl); - } - - inline void TLS_stream::write(buffer_t buffer) - { - if (UNLIKELY(this->is_connected() == false)) { - TLS_PRINT("TLS_stream::write() called on closed stream\n"); - return; - } - - int n = SSL_write(this->m_ssl, buffer->data(), buffer->size()); - auto status = this->status(n); - if (status == STATUS_FAIL) { - this->close(); - return; - } - - do { - n = tls_perform_stream_write(); - } while (n > 0); - } - inline void TLS_stream::write(const std::string& str) - { - write(net::Stream::construct_buffer(str.data(), str.data() + str.size())); - } - inline void TLS_stream::write(const void* data, const size_t len) - { - auto* buf = static_cast (data); - write(net::Stream::construct_buffer(buf, buf + len)); - } - - inline void TLS_stream::tls_read(buffer_t buffer) - { - ERR_clear_error(); - uint8_t* buf = buffer->data(); - int len = buffer->size(); - - while (len > 0) - { - int n = BIO_write(this->m_bio_rd, buf, len); - if (UNLIKELY(n < 0)) { - this->close(); - return; - } - buf += n; - len -= n; - - // if we aren't finished initializing session - if (UNLIKELY(!handshake_completed())) - { - int num = SSL_do_handshake(this->m_ssl); - auto status = this->status(num); - - // OpenSSL wants to write - if (status == STATUS_WANT_IO) - { - tls_perform_stream_write(); - } - else if (status == STATUS_FAIL) - { - if (num < 0) { - TLS_PRINT("TLS_stream::SSL_do_handshake() returned %d\n", num); - #ifdef VERBOSE_OPENSSL - ERR_print_errors_fp(stdout); - #endif - } - this->close(); - return; - } - // nothing more to do if still not finished - if (handshake_completed() == false) return; - // handshake success - if (m_on_connect) m_on_connect(*this); - } - - // read decrypted data - do { - char temp[8192]; - n = SSL_read(this->m_ssl, temp, sizeof(temp)); - if (n > 0) { - auto buf = net::Stream::construct_buffer(temp, temp + n); - if (m_on_read) { - this->m_busy = true; - m_on_read(std::move(buf)); - this->m_busy = false; - } - } - } while (n > 0); - // this goes here? - if (UNLIKELY(this->is_closing() || this->is_closed())) { - TLS_PRINT("TLS_stream::SSL_read closed during read\n"); - return; - } - if (this->m_deferred_close) { - this->close(); return; - } - - auto status = this->status(n); - // did peer request stream renegotiation? - if (status == STATUS_WANT_IO) - { - do { - n = tls_perform_stream_write(); - } while (n > 0); - } - else if (status == STATUS_FAIL) - { - this->close(); - return; - } - // check deferred closing - if (this->m_deferred_close) { - this->close(); return; - } - - } // while it < end - } // tls_read() - - inline int TLS_stream::tls_perform_stream_write() - { - ERR_clear_error(); - int pending = BIO_ctrl_pending(this->m_bio_wr); - //printf("pending: %d\n", pending); - if (pending > 0) - { - auto buffer = net::Stream::construct_buffer(pending); - int n = BIO_read(this->m_bio_wr, buffer->data(), buffer->size()); - assert(n == pending); - m_transport->write(buffer); - if (m_on_write) { - this->m_busy = true; - m_on_write(n); - this->m_busy = false; - } - return n; - } - else { - BIO_read(this->m_bio_wr, nullptr, 0); - } - if (!BIO_should_retry(this->m_bio_wr)) - { - this->close(); - return -1; - } - return 0; - } - inline int TLS_stream::tls_perform_handshake() - { - ERR_clear_error(); // prevent old errors from mucking things up - // will return -1:SSL_ERROR_WANT_WRITE - int ret = SSL_do_handshake(this->m_ssl); - int n = this->status(ret); - ERR_print_errors_fp(stderr); - if (n == STATUS_WANT_IO) - { - do { - n = tls_perform_stream_write(); - if (n < 0) { - TLS_PRINT("TLS_stream::tls_perform_handshake() stream write failed\n"); - } - } while (n > 0); - return n; - } - else { - TLS_PRINT("TLS_stream::tls_perform_handshake() returned %d\n", ret); - this->close(); - return -1; - } - } - - inline void TLS_stream::close() - { - //ERR_clear_error(); - if (this->m_busy) { - this->m_deferred_close = true; return; - } - CloseCallback func = std::move(this->m_on_close); - this->reset_callbacks(); - if (m_transport->is_connected()) - m_transport->close(); - if (func) func(); - } - inline void TLS_stream::close_callback_once() - { - if (this->m_busy) { - this->m_deferred_close = true; return; - } - CloseCallback func = std::move(this->m_on_close); - this->reset_callbacks(); - if (func) func(); - } - inline void TLS_stream::reset_callbacks() - { - this->m_on_close = nullptr; - this->m_on_connect = nullptr; - this->m_on_read = nullptr; - this->m_on_write = nullptr; - } - - inline bool TLS_stream::handshake_completed() const noexcept - { - return SSL_is_init_finished(this->m_ssl); - } - inline TLS_stream::status_t TLS_stream::status(int n) const noexcept - { - int error = SSL_get_error(this->m_ssl, n); - switch (error) - { - case SSL_ERROR_NONE: - return STATUS_OK; - case SSL_ERROR_WANT_WRITE: - case SSL_ERROR_WANT_READ: - return STATUS_WANT_IO; - default: - return STATUS_FAIL; - } - } } // openssl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a74cc1c9d2..cc1eddb3ba 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,7 @@ include_directories(${OPENSSL_DIR}/include) if(${ARCH} STREQUAL "x86_64") set(OPENSSL_MODULES "net/openssl/init.cpp" "net/openssl/client.cpp" "net/openssl/server.cpp" + "net/openssl/tls_stream.cpp" "net/https/openssl_server.cpp" "net/http/client.cpp") set(OPENSSL_LIBS openssl_ssl openssl_crypto) endif() diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp new file mode 100644 index 0000000000..e52381d7a2 --- /dev/null +++ b/src/net/openssl/tls_stream.cpp @@ -0,0 +1,372 @@ + +#include + + +using namespace openssl; + +TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) + : m_transport(std::move(t)) +{ + ERR_clear_error(); // prevent old errors from mucking things up + this->m_bio_rd = BIO_new(BIO_s_mem()); + this->m_bio_wr = BIO_new(BIO_s_mem()); + assert(ERR_get_error() == 0 && "Initializing BIOs"); + this->m_ssl = SSL_new(ctx); + assert(this->m_ssl != nullptr); + assert(ERR_get_error() == 0 && "Initializing SSL"); + // TLS server-mode + if (outgoing == false) + SSL_set_accept_state(this->m_ssl); + else + SSL_set_connect_state(this->m_ssl); + + SSL_set_bio(this->m_ssl, this->m_bio_rd, this->m_bio_wr); + // always-on callbacks + m_transport->on_read(8192, {this, &TLS_stream::tls_read}); + m_transport->on_close({this, &TLS_stream::close_callback_once}); + + // start TLS handshake process + if (outgoing == true) + { + if (this->tls_perform_handshake() < 0) return; + } +} +TLS_stream::TLS_stream(Stream_ptr t, SSL* ssl, BIO* rd, BIO* wr) + : m_transport(std::move(t)), m_ssl(ssl), m_bio_rd(rd), m_bio_wr(wr) +{ + // always-on callbacks + m_transport->on_read(8192, {this, &TLS_stream::tls_read}); + m_transport->on_close({this, &TLS_stream::close_callback_once}); +} +TLS_stream::~TLS_stream() +{ + assert(m_busy == false && "Cannot delete stream while in its call stack"); + SSL_free(this->m_ssl); +} + +void TLS_stream::write(buffer_t buffer) +{ + if (UNLIKELY(this->is_connected() == false)) { + TLS_PRINT("TLS_stream::write() called on closed stream\n"); + return; + } + + int n = SSL_write(this->m_ssl, buffer->data(), buffer->size()); + auto status = this->status(n); + if (status == STATUS_FAIL) { + this->close(); + return; + } + + auto alloc=buffer->get_allocator(); + + + //if stored ptr is nullptr then create it + if (UNLIKELY(!tls_buffer)) + { + //perform initial pre alloc quite large + try { + printf("Creating initial pmr buffer size %zu\n",buffer->size()*2); + tls_buffer=std::make_shared>(buffer->size()*2,alloc); + } + catch (std::exception &e) //this is allways a failed to allocate!! + { + //could attempt buffer reuse.. + printf("Failed to allocate to pre buffer\n"); + return; + } + } + + //release memory + buffer->clear(); + //reset ? + //delete buffer; + //first Buffer R belongs to US + + //if shared ptr is unset create initial buffer + //Not sane.. + do { + n = tls_write_to_stream(alloc); + } while (n > 0); +} +void TLS_stream::write(const std::string& str) +{ + write(net::Stream::construct_buffer(str.data(), str.data() + str.size())); +} +void TLS_stream::write(const void* data, const size_t len) +{ + auto* buf = static_cast (data); + write(net::Stream::construct_buffer(buf, buf + len)); +} + +void TLS_stream::tls_read(buffer_t buffer) +{ + ERR_clear_error(); + uint8_t* buf = buffer->data(); + int len = buffer->size(); + + while (len > 0) + { + int n = BIO_write(this->m_bio_rd, buf, len); + if (UNLIKELY(n < 0)) { + this->close(); + return; + } + buf += n; + len -= n; + + // if we aren't finished initializing session + if (UNLIKELY(!handshake_completed())) + { + int num = SSL_do_handshake(this->m_ssl); + auto status = this->status(num); + + // OpenSSL wants to write + if (status == STATUS_WANT_IO) + { + tls_perform_stream_write(); + } + else if (status == STATUS_FAIL) + { + if (num < 0) { + TLS_PRINT("TLS_stream::SSL_do_handshake() returned %d\n", num); + #ifdef VERBOSE_OPENSSL + ERR_print_errors_fp(stdout); + #endif + } + this->close(); + return; + } + // nothing more to do if still not finished + if (handshake_completed() == false) return; + // handshake success + if (m_on_connect) m_on_connect(*this); + } + + // read decrypted data + do { + char temp[8192]; + n = SSL_read(this->m_ssl, temp, sizeof(temp)); + if (n > 0) { + auto buf = net::Stream::construct_buffer(temp, temp + n); + if (m_on_read) { + this->m_busy = true; + m_on_read(std::move(buf)); + this->m_busy = false; + } + } + } while (n > 0); + // this goes here? + if (UNLIKELY(this->is_closing() || this->is_closed())) { + TLS_PRINT("TLS_stream::SSL_read closed during read\n"); + return; + } + if (this->m_deferred_close) { + this->close(); return; + } + + auto status = this->status(n); + // did peer request stream renegotiation? + if (status == STATUS_WANT_IO) + { + do { + n = tls_perform_stream_write(); + } while (n > 0); + } + else if (status == STATUS_FAIL) + { + this->close(); + return; + } + // check deferred closing + if (this->m_deferred_close) { + this->close(); return; + } + + } // while it < end +} // tls_read() + + +//TODO pass allocator !! +int TLS_stream::tls_write_to_stream(Alloc &alloc/*buffer_t buffer*/) +{ + ERR_clear_error(); + int pending = BIO_ctrl_pending(this->m_bio_wr); + printf("pending: %d\n", pending); + if (pending > 0) + { + //TODO create a preallocated buffer ? + //this allocates in the buffer.. + //tls_write_to_stream + +// auto buffer = net::Stream::construct_buffer(pending); + //printf("buffer size %zu\n",buffer->size()); + if (pending != tls_buffer->size()) + { + //try catch only when + /*if (UNLIKELY(pending > tls_buffer->capacity())) + { + try + }*/ + //printf("Increasing size of tls_buffer to %zu\n",pending); + try + { + tls_buffer->resize(pending); + } + catch (std::exception &e) + { + //release whats allocated + tls_buffer->clear(); + //set nullptr + tls_buffer=nullptr; + return 0; + } + } + //printf("buffer size %zu\n",tls_buffer->size()); + int n = BIO_read(this->m_bio_wr, tls_buffer->data(), tls_buffer->size()); + assert(n == pending); + //printf("transport write\n"); + m_transport->write(tls_buffer); + + try + { + printf("Assigning new buffer to tls_buffer\n"); + tls_buffer = std::make_shared>(pending,alloc); + } + catch (std::exception &e) + { + printf("Failed to allocate tls_buffer setting shared_ptr to nullptr\n"); + //move problem up the chain by setting the shared ptr to a nullptr + tls_buffer = nullptr;//std::make_shared>(0); + //return 0 + } + + if (m_on_write) { + this->m_busy = true; + m_on_write(n); + this->m_busy = false; + } + return n; + } + else { + BIO_read(this->m_bio_wr, nullptr, 0); + } + if (!BIO_should_retry(this->m_bio_wr)) + { + this->close(); + return -1; + } + return 0; +} + +//When no pmr buffer is passed use malloc +int TLS_stream::tls_perform_stream_write() +{ + ERR_clear_error(); + int pending = BIO_ctrl_pending(this->m_bio_wr); + printf("pending: %d\n", pending); + if (pending > 0) + { + + auto buffer = std::make_shared>(pending);//(std::vector(pending)); + if (buffer->size() < pending) + { + printf("Buffer %zu < pending %zu\n",buffer->size(),pending); + //descope buffer + return 0; + } + //auto buffer = net::Stream::construct_buffer(pending); + printf("buffer size %zu\n",buffer->size()); + int n = BIO_read(this->m_bio_wr, buffer->data(), buffer->size()); + assert(n == pending); + printf("transport write\n"); + m_transport->write(buffer); + if (m_on_write) { + this->m_busy = true; + m_on_write(n); + this->m_busy = false; + } + return n; + } + else { + BIO_read(this->m_bio_wr, nullptr, 0); + } + if (!BIO_should_retry(this->m_bio_wr)) + { + this->close(); + return -1; + } + return 0; +} + +int TLS_stream::tls_perform_handshake() +{ + ERR_clear_error(); // prevent old errors from mucking things up + // will return -1:SSL_ERROR_WANT_WRITE + int ret = SSL_do_handshake(this->m_ssl); + int n = this->status(ret); + ERR_print_errors_fp(stderr); + if (n == STATUS_WANT_IO) + { + do { + n = tls_perform_stream_write(); + if (n < 0) { + TLS_PRINT("TLS_stream::tls_perform_handshake() stream write failed\n"); + } + } while (n > 0); + return n; + } + else { + TLS_PRINT("TLS_stream::tls_perform_handshake() returned %d\n", ret); + this->close(); + return -1; + } +} + +void TLS_stream::close() +{ + //ERR_clear_error(); + if (this->m_busy) { + this->m_deferred_close = true; return; + } + CloseCallback func = std::move(this->m_on_close); + this->reset_callbacks(); + if (m_transport->is_connected()) + m_transport->close(); + if (func) func(); +} +void TLS_stream::close_callback_once() +{ + if (this->m_busy) { + this->m_deferred_close = true; return; + } + CloseCallback func = std::move(this->m_on_close); + this->reset_callbacks(); + if (func) func(); +} +void TLS_stream::reset_callbacks() +{ + this->m_on_close = nullptr; + this->m_on_connect = nullptr; + this->m_on_read = nullptr; + this->m_on_write = nullptr; +} + +bool TLS_stream::handshake_completed() const noexcept +{ + return SSL_is_init_finished(this->m_ssl); +} +TLS_stream::status_t TLS_stream::status(int n) const noexcept +{ + int error = SSL_get_error(this->m_ssl, n); + switch (error) + { + case SSL_ERROR_NONE: + return STATUS_OK; + case SSL_ERROR_WANT_WRITE: + case SSL_ERROR_WANT_READ: + return STATUS_WANT_IO; + default: + return STATUS_FAIL; + } +} From 7ec4f5b0b7c168e738f64172a7cc1feca5926de8 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Thu, 10 Jan 2019 15:26:17 +0100 Subject: [PATCH 68/93] 0.13.x: Created StreamBuffer to wrap Stream and refactor out parts of tls_stream and moved from on_read to also supporting on_data and reduced microlb --- api/net/openssl/tls_stream.hpp | 45 +---- api/net/stream_buffer.hpp | 173 +++++++++++++++++ lib/microLB/micro_lb/balancer.cpp | 58 +++--- lib/microLB/micro_lb/balancer.hpp | 6 +- src/net/openssl/tls_stream.cpp | 301 ++++++++++++------------------ 5 files changed, 329 insertions(+), 254 deletions(-) create mode 100644 api/net/stream_buffer.hpp diff --git a/api/net/openssl/tls_stream.hpp b/api/net/openssl/tls_stream.hpp index 124273ac7d..5d0d6d0b7d 100644 --- a/api/net/openssl/tls_stream.hpp +++ b/api/net/openssl/tls_stream.hpp @@ -2,18 +2,18 @@ #include #include #include -#include +#include -//#define VERBOSE_OPENSSL +//#define VERBOSE_OPENSSL 0 #ifdef VERBOSE_OPENSSL -#define TLS_PRINT(fmt, ...) printf(fmt, ##__VA_ARGS__) +#define TLS_PRINT(fmt, ...) printf("TLS_Stream");printf(fmt, ##__VA_ARGS__) #else #define TLS_PRINT(fmt, ...) /* fmt */ #endif namespace openssl { - struct TLS_stream : public net::Stream + struct TLS_stream : public net::StreamBuffer { using Stream_ptr = net::Stream_ptr; @@ -25,7 +25,6 @@ namespace openssl void write(const std::string&) override; void write(const void* buf, size_t n) override; void close() override; - void reset_callbacks() override; net::Socket local() const override { return m_transport->local(); @@ -37,35 +36,11 @@ namespace openssl return m_transport->to_string(); } - void on_connect(ConnectCallback cb) override { - m_on_connect = std::move(cb); - } - void on_read(size_t, ReadCallback cb) override { - m_on_read = std::move(cb); - } - void on_data(DataCallback cb) override { - m_on_data = std::move(cb); - } - size_t next_size() override { - // FIXME: implement buffering for read_next - return 0; - } - buffer_t read_next() override { - // FIXME: implement buffering for read_next - return{}; - } - void on_close(CloseCallback cb) override { - m_on_close = std::move(cb); - } - void on_write(WriteCallback cb) override { - m_on_write = std::move(cb); - } - bool is_connected() const noexcept override { return handshake_completed() && m_transport->is_connected(); } bool is_writable() const noexcept override { - return is_connected() && m_transport->is_writable(); + return (not write_congested()) && is_connected() && m_transport->is_writable(); } bool is_readable() const noexcept override { return m_transport->is_readable(); @@ -88,6 +63,10 @@ namespace openssl size_t serialize_to(void*) const override; private: + void data(); + int decrypt(const void *data,int size); + int send_decrypted(); + void tls_read(buffer_t); int tls_perform_stream_write(); int tls_perform_handshake(); @@ -100,18 +79,12 @@ namespace openssl STATUS_FAIL }; status_t status(int n) const noexcept; - Stream_ptr m_transport = nullptr; SSL* m_ssl = nullptr; BIO* m_bio_rd = nullptr; BIO* m_bio_wr = nullptr; bool m_busy = false; bool m_deferred_close = false; - ConnectCallback m_on_connect = nullptr; - ReadCallback m_on_read = nullptr; - DataCallback m_on_data = nullptr; - WriteCallback m_on_write = nullptr; - CloseCallback m_on_close = nullptr; }; } // openssl diff --git a/api/net/stream_buffer.hpp b/api/net/stream_buffer.hpp new file mode 100644 index 0000000000..c26664859f --- /dev/null +++ b/api/net/stream_buffer.hpp @@ -0,0 +1,173 @@ +#ifndef STREAMBUFFERR_HPP +#define STREAMBUFFERR_HPP +#include +#include +namespace net { + class StreamBuffer : public net::Stream + { + public: + using buffer_t = os::mem::buf_ptr; + using Ready_queue = std::deque; + //virtual ~StreamBuffer(); + + void on_connect(ConnectCallback cb) override { + m_on_connect = std::move(cb); + } + + void on_read(size_t, ReadCallback cb) override { + m_on_read = std::move(cb); + } + void on_data(DataCallback cb) override { + m_on_data = std::move(cb); + } + size_t next_size() override; + + buffer_t read_next() override; + + void on_close(CloseCallback cb) override { + m_on_close = std::move(cb); + } + void on_write(WriteCallback cb) override { + m_on_write = std::move(cb); + } + + void signal_data(); + + bool read_congested() const noexcept + { return m_read_congested; } + + bool write_congested() const noexcept + { return m_write_congested; } + + /** + * @brief Construct a shared read vector used by streams + * If allocation failed congestion flag is set + * + * @param construction parameters + * + * @return nullptr on failure, shared_ptr to buffer on success + */ + template + buffer_t construct_read_buffer(Args&&... args) + { + return construct_buffer_with_flag(m_read_congested,std::forward (args)...); + } + + /** + * @brief Construct a shared write vector used by streams + * If allocation failed congestion flag is set + * + * @param construction parameters + * + * @return nullptr on failure, shared_ptr to buffer on success + */ + template + buffer_t construct_write_buffer(Args&&... args) + { + return construct_buffer_with_flag(m_write_congested,std::forward (args)...); + } + + protected: + void closed() + { if (m_on_close) m_on_close(); } + void connected() + { if (m_on_connect) m_on_connect(*this); } + void stream_on_write(int n) + { if (m_on_write) m_on_write(n); } + void enqueue_data(buffer_t data) + { m_send_buffers.push_back(data); } + + CloseCallback getCloseCallback() { return std::move(this->m_on_close); } + + void reset_callbacks() override + { + //remove queue and reset congestion flags and busy flag ?? + this->m_on_close = nullptr; + this->m_on_connect = nullptr; + this->m_on_read = nullptr; + this->m_on_write = nullptr; + this->m_on_data = nullptr; + } + private: + + bool m_write_congested= false; + bool m_read_congested = false; + + ConnectCallback m_on_connect = nullptr; + ReadCallback m_on_read = nullptr; + DataCallback m_on_data = nullptr; + WriteCallback m_on_write = nullptr; + CloseCallback m_on_close = nullptr; + Ready_queue m_send_buffers; + + /** + * @brief Construct a shared vector and set congestion flag if allocation fails + * + * @param flag the flag to set true or false on allocation failure + * @param args arguments to constructing the buffer + * @return nullptr on failure , shared pointer to buffer on success + */ + + template + buffer_t construct_buffer_with_flag(bool &flag,Args&&... args) + { + buffer_t buffer; + try + { + buffer = std::make_shared (std::forward (args)...); + flag = false; + } + catch (std::exception &e) + { + flag = true; + return nullptr; + } + return buffer; + } + + + }; // < class StreamBuffer + + inline size_t StreamBuffer::next_size() + { + if (not m_send_buffers.empty()) { + return m_send_buffers.front()->size(); + } + return 0; + } + + inline StreamBuffer::buffer_t StreamBuffer::read_next() + { + + if (not m_send_buffers.empty()) { + auto buf = m_send_buffers.front(); + m_send_buffers.pop_front(); + return buf; + } + return nullptr; + } + + inline void StreamBuffer::signal_data() + { + if (not m_send_buffers.empty()) + { + if (m_on_data != nullptr){ + //on_data_callback(); + m_on_data(); + if (not m_send_buffers.empty()) { + // FIXME: Make sure this event gets re-triggered + // For now the user will have to make sure to re-read later if they couldn't + } + } + else if (m_on_read != nullptr) + { + for (auto buf : m_send_buffers) { + // Pop each time, in case callback leads to another call here. + m_send_buffers.pop_front(); + m_on_read(buf); + } + } + } + } +} // namespace net +#endif // STREAMBUFFERR_HPP diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 317e55809a..b9e8cefa79 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -1,8 +1,6 @@ #include "balancer.hpp" #include -#define READQ_PER_CLIENT 4096 -#define READQ_FOR_NODES 8192 #define MAX_OUTGOING_ATTEMPTS 100 // checking if nodes are dead or not #define ACTIVE_INITIAL_PERIOD 8s @@ -13,7 +11,7 @@ #define LB_VERBOSE 0 #if LB_VERBOSE -#define LBOUT(fmt, ...) printf(fmt, ##__VA_ARGS__) +#define LBOUT(fmt, ...) printf("MICROLB: "); printf(fmt, ##__VA_ARGS__) #else #define LBOUT(fmt, ...) /** **/ #endif @@ -75,7 +73,7 @@ namespace microLB if (client.conn->is_connected()) { // NOTE: explicitly want to copy buffers net::Stream_ptr rval = - nodes.assign(std::move(client.conn), client.readq); + nodes.assign(std::move(client.conn)); if (rval == nullptr) { // done with this queue item queue.pop_front(); @@ -143,20 +141,11 @@ namespace microLB // Release connection if it closes before it's assigned to a node. this->conn->on_close([this](){ + printf("Waiting issuing close\n"); if (this->conn != nullptr) this->conn->reset_callbacks(); this->conn = nullptr; }); - - // queue incoming data from clients not yet - // assigned to a node - this->conn->on_read(READQ_PER_CLIENT, - [this] (auto buf) { - // prevent buffer bloat attack - this->total += buf->size(); - LBOUT("*** Queued %lu bytes\n", buf->size()); - readq.push_back(buf); - }); } void Nodes::create_connections(int total) @@ -189,7 +178,7 @@ namespace microLB } } } - net::Stream_ptr Nodes::assign(net::Stream_ptr conn, queue_vector_t& readq) + net::Stream_ptr Nodes::assign(net::Stream_ptr conn) { for (size_t i = 0; i < nodes.size(); i++) { @@ -202,11 +191,7 @@ namespace microLB assert(outgoing->is_connected()); LBOUT("Assigning client to node %d (%s)\n", algo_iterator, outgoing->to_string().c_str()); - // flush readq to outgoing before creating session - for (auto buffer : readq) { - LBOUT("*** Flushing %lu bytes\n", buffer->size()); - outgoing->write(buffer); - } + //Should we some way hold track of the session object ? auto& session = this->create_session( std::move(conn), std::move(outgoing)); @@ -362,6 +347,7 @@ namespace microLB int fail_timer = Timers::oneshot(CONNECT_TIMEOUT, [this, outgoing] (int) { + printf("Fail timer\n"); // close connection outgoing->abort(); // no longer connecting @@ -403,8 +389,14 @@ namespace microLB auto conn = std::move(pool.back()); assert(conn != nullptr); pool.pop_back(); - if (conn->is_connected()) return conn; - else conn->close(); + if (conn->is_connected()) { + return conn; + } + else + { + printf("CLOSING SINCE conn->connected is false\n"); + conn->close(); + } } return nullptr; } @@ -415,19 +407,25 @@ namespace microLB : parent(n), self(idx), incoming(std::move(inc)), outgoing(std::move(out)) { - incoming->on_read(READQ_PER_CLIENT, - [this] (auto buf) { - assert(this->is_alive()); - this->outgoing->write(buf); + + incoming->on_data([this]() { + assert(this->is_alive()); + while((this->incoming->next_size() > 0) and this->outgoing->is_writable()) + { + this->outgoing->write(this->incoming->read_next()); + } }); incoming->on_close( [&nodes = n, idx] () { nodes.close_session(idx); }); - outgoing->on_read(READQ_FOR_NODES, - [this] (auto buf) { - assert(this->is_alive()); - this->incoming->write(buf); + + outgoing->on_data([this]() { + assert(this->is_alive()); + while((this->outgoing->next_size() > 0) and this->incoming->is_writable()) + { + this->incoming->write(this->outgoing->read_next()); + } }); outgoing->on_close( [&nodes = n, idx] () { diff --git a/lib/microLB/micro_lb/balancer.hpp b/lib/microLB/micro_lb/balancer.hpp index 8fb095b320..924a5412cf 100644 --- a/lib/microLB/micro_lb/balancer.hpp +++ b/lib/microLB/micro_lb/balancer.hpp @@ -6,7 +6,6 @@ namespace microLB { typedef net::Inet netstack_t; typedef net::tcp::Connection_ptr tcp_ptr; - typedef std::vector queue_vector_t; typedef delegate pool_signal_t; struct Waiting { @@ -15,7 +14,6 @@ namespace microLB void serialize(liu::Storage&); net::Stream_ptr conn; - queue_vector_t readq; int total = 0; }; @@ -37,7 +35,7 @@ namespace microLB auto address() const noexcept { return this->addr; } int connection_attempts() const noexcept { return this->connecting; } int pool_size() const noexcept { return pool.size(); } - bool is_active() const noexcept { return active; }; + bool is_active() const noexcept { return active; } bool active_check() const noexcept { return do_active_check; } void restart_active_check(); @@ -77,7 +75,7 @@ namespace microLB void add_node(Args&&... args); void create_connections(int total); // returns the connection back if the operation fails - net::Stream_ptr assign(net::Stream_ptr, queue_vector_t&); + net::Stream_ptr assign(net::Stream_ptr); Session& create_session(net::Stream_ptr inc, net::Stream_ptr out); void close_session(int); Session& get_session(int); diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp index e52381d7a2..625b0aee88 100644 --- a/src/net/openssl/tls_stream.cpp +++ b/src/net/openssl/tls_stream.cpp @@ -1,7 +1,5 @@ - #include - using namespace openssl; TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) @@ -21,8 +19,9 @@ TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) SSL_set_connect_state(this->m_ssl); SSL_set_bio(this->m_ssl, this->m_bio_rd, this->m_bio_wr); + // always-on callbacks - m_transport->on_read(8192, {this, &TLS_stream::tls_read}); + m_transport->on_data({this,&TLS_stream::data}); m_transport->on_close({this, &TLS_stream::close_callback_once}); // start TLS handshake process @@ -35,7 +34,7 @@ TLS_stream::TLS_stream(Stream_ptr t, SSL* ssl, BIO* rd, BIO* wr) : m_transport(std::move(t)), m_ssl(ssl), m_bio_rd(rd), m_bio_wr(wr) { // always-on callbacks - m_transport->on_read(8192, {this, &TLS_stream::tls_read}); + m_transport->on_data({this, &TLS_stream::data}); m_transport->on_close({this, &TLS_stream::close_callback_once}); } TLS_stream::~TLS_stream() @@ -47,252 +46,191 @@ TLS_stream::~TLS_stream() void TLS_stream::write(buffer_t buffer) { if (UNLIKELY(this->is_connected() == false)) { - TLS_PRINT("TLS_stream::write() called on closed stream\n"); + TLS_PRINT("::write() called on closed stream\n"); return; } - int n = SSL_write(this->m_ssl, buffer->data(), buffer->size()); auto status = this->status(n); if (status == STATUS_FAIL) { + TLS_PRINT("::write() Fail status %d\n",n); this->close(); return; } - auto alloc=buffer->get_allocator(); + do { + n = tls_perform_stream_write(); + } while (n > 0); +} + +void TLS_stream::write(const std::string& str) +{ + write(net::Stream::construct_buffer(str.data(), str.data() + str.size())); +} + +void TLS_stream::write(const void* data, const size_t len) +{ + auto* buf = static_cast (data); + write(net::Stream::construct_buffer(buf, buf + len)); +} +int TLS_stream::decrypt(const void *indata, int size) +{ + int n = BIO_write(this->m_bio_rd, indata, size); + if (UNLIKELY(n < 0)) { + //TODO can we handle this more gracefully? + TLS_PRINT("BIO_write failed\n"); + this->close(); + return 0; + } - //if stored ptr is nullptr then create it - if (UNLIKELY(!tls_buffer)) + // if we aren't finished initializing session + if (UNLIKELY(!handshake_completed())) { - //perform initial pre alloc quite large - try { - printf("Creating initial pmr buffer size %zu\n",buffer->size()*2); - tls_buffer=std::make_shared>(buffer->size()*2,alloc); + int num = SSL_do_handshake(this->m_ssl); + auto status = this->status(num); + + // OpenSSL wants to write + if (status == STATUS_WANT_IO) + { + tls_perform_stream_write(); } - catch (std::exception &e) //this is allways a failed to allocate!! + else if (status == STATUS_FAIL) { - //could attempt buffer reuse.. - printf("Failed to allocate to pre buffer\n"); - return; + if (num < 0) { + TLS_PRINT("TLS_stream::SSL_do_handshake() returned %d\n", num); + #ifdef VERBOSE_OPENSSL + ERR_print_errors_fp(stdout); + #endif + } + this->close(); + return 0; } + // nothing more to do if still not finished + if (handshake_completed() == false) return 0; + // handshake success + connected(); } + return n; +} - //release memory - buffer->clear(); - //reset ? - //delete buffer; - //first Buffer R belongs to US - - //if shared ptr is unset create initial buffer - //Not sane.. +int TLS_stream::send_decrypted() +{ + int n; + buffer_t buffer; + // read decrypted data do { - n = tls_write_to_stream(alloc); + //TODO "increase the size ?") + auto buffer=StreamBuffer::construct_read_buffer(8192); + if (!buffer) return 0; + n = SSL_read(this->m_ssl,buffer->data(),buffer->size()); + if (n > 0) { + buffer->resize(n); + enqueue_data(buffer); + // m_receive_buffers.push_back(buffer); + } } while (n > 0); + return n; } -void TLS_stream::write(const std::string& str) -{ - write(net::Stream::construct_buffer(str.data(), str.data() + str.size())); -} -void TLS_stream::write(const void* data, const size_t len) + +void TLS_stream::data() { - auto* buf = static_cast (data); - write(net::Stream::construct_buffer(buf, buf + len)); + buffer_t buf; + while ((not read_congested() && (buf=m_transport->read_next()) != nullptr)) + { + TLS_PRINT("::data() Received %lu bytes\n",buf->size()); + tls_read(buf); + } } void TLS_stream::tls_read(buffer_t buffer) { ERR_clear_error(); - uint8_t* buf = buffer->data(); + uint8_t* buf_ptr = buffer->data(); int len = buffer->size(); while (len > 0) { - int n = BIO_write(this->m_bio_rd, buf, len); - if (UNLIKELY(n < 0)) { - this->close(); - return; - } - buf += n; - len -= n; + int decrypted_bytes=decrypt(buf_ptr,len); + if (UNLIKELY(decrypted_bytes==0)) return; + buf_ptr += decrypted_bytes; + len -= decrypted_bytes; - // if we aren't finished initializing session - if (UNLIKELY(!handshake_completed())) - { - int num = SSL_do_handshake(this->m_ssl); - auto status = this->status(num); + int ret=send_decrypted(); - // OpenSSL wants to write - if (status == STATUS_WANT_IO) - { - tls_perform_stream_write(); - } - else if (status == STATUS_FAIL) - { - if (num < 0) { - TLS_PRINT("TLS_stream::SSL_do_handshake() returned %d\n", num); - #ifdef VERBOSE_OPENSSL - ERR_print_errors_fp(stdout); - #endif - } - this->close(); - return; - } - // nothing more to do if still not finished - if (handshake_completed() == false) return; - // handshake success - if (m_on_connect) m_on_connect(*this); - } - // read decrypted data - do { - char temp[8192]; - n = SSL_read(this->m_ssl, temp, sizeof(temp)); - if (n > 0) { - auto buf = net::Stream::construct_buffer(temp, temp + n); - if (m_on_read) { - this->m_busy = true; - m_on_read(std::move(buf)); - this->m_busy = false; - } - } - } while (n > 0); // this goes here? if (UNLIKELY(this->is_closing() || this->is_closed())) { TLS_PRINT("TLS_stream::SSL_read closed during read\n"); return; } if (this->m_deferred_close) { - this->close(); return; + TLS_PRINT("::read() close on m_deferred_close"); + this->close(); + return; } - auto status = this->status(n); + auto status = this->status(ret); // did peer request stream renegotiation? if (status == STATUS_WANT_IO) { + TLS_PRINT("::read() STATUS_WANT_IO\n"); + int ret; do { - n = tls_perform_stream_write(); - } while (n > 0); + ret = tls_perform_stream_write(); + } while (ret > 0); } else if (status == STATUS_FAIL) { + TLS_PRINT("::read() close on STATUS_FAIL after tls_perform_stream_write\n"); this->close(); return; } // check deferred closing if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); this->close(); return; } - } // while it < end -} // tls_read() + //forward data + this->m_busy=true; + signal_data(); + this->m_busy=false; +} // tls_read() -//TODO pass allocator !! -int TLS_stream::tls_write_to_stream(Alloc &alloc/*buffer_t buffer*/) +int TLS_stream::tls_perform_stream_write() { ERR_clear_error(); int pending = BIO_ctrl_pending(this->m_bio_wr); - printf("pending: %d\n", pending); if (pending > 0) { - //TODO create a preallocated buffer ? - //this allocates in the buffer.. - //tls_write_to_stream - -// auto buffer = net::Stream::construct_buffer(pending); - //printf("buffer size %zu\n",buffer->size()); - if (pending != tls_buffer->size()) - { - //try catch only when - /*if (UNLIKELY(pending > tls_buffer->capacity())) - { - try - }*/ - //printf("Increasing size of tls_buffer to %zu\n",pending); - try - { - tls_buffer->resize(pending); - } - catch (std::exception &e) - { - //release whats allocated - tls_buffer->clear(); - //set nullptr - tls_buffer=nullptr; - return 0; - } + auto buffer= net::StreamBuffer::construct_write_buffer(pending); + if (buffer == nullptr) { + printf("Failed to construct buffer\n"); + return 0; } - //printf("buffer size %zu\n",tls_buffer->size()); - int n = BIO_read(this->m_bio_wr, tls_buffer->data(), tls_buffer->size()); + int n = BIO_read(this->m_bio_wr, buffer->data(), buffer->size()); assert(n == pending); - //printf("transport write\n"); - m_transport->write(tls_buffer); - - try + //What if we cant write.. + if (m_transport->is_writable()) { - printf("Assigning new buffer to tls_buffer\n"); - tls_buffer = std::make_shared>(pending,alloc); - } - catch (std::exception &e) - { - printf("Failed to allocate tls_buffer setting shared_ptr to nullptr\n"); - //move problem up the chain by setting the shared ptr to a nullptr - tls_buffer = nullptr;//std::make_shared>(0); - //return 0 - } + m_transport->write(buffer); - if (m_on_write) { this->m_busy = true; - m_on_write(n); + stream_on_write(n); this->m_busy = false; } - return n; - } - else { - BIO_read(this->m_bio_wr, nullptr, 0); - } - if (!BIO_should_retry(this->m_bio_wr)) - { - this->close(); - return -1; - } - return 0; -} - -//When no pmr buffer is passed use malloc -int TLS_stream::tls_perform_stream_write() -{ - ERR_clear_error(); - int pending = BIO_ctrl_pending(this->m_bio_wr); - printf("pending: %d\n", pending); - if (pending > 0) - { - - auto buffer = std::make_shared>(pending);//(std::vector(pending)); - if (buffer->size() < pending) + if (UNLIKELY((pending = BIO_ctrl_pending(this->m_bio_wr)) > 0)) { - printf("Buffer %zu < pending %zu\n",buffer->size(),pending); - //descope buffer - return 0; - } - //auto buffer = net::Stream::construct_buffer(pending); - printf("buffer size %zu\n",buffer->size()); - int n = BIO_read(this->m_bio_wr, buffer->data(), buffer->size()); - assert(n == pending); - printf("transport write\n"); - m_transport->write(buffer); - if (m_on_write) { - this->m_busy = true; - m_on_write(n); - this->m_busy = false; + return pending; } - return n; - } - else { - BIO_read(this->m_bio_wr, nullptr, 0); + return 0; } + + BIO_read(this->m_bio_wr, nullptr, 0); if (!BIO_should_retry(this->m_bio_wr)) { + TLS_PRINT("::tls_perform_stream_write() close on !BIO_should_retry\n"); this->close(); return -1; } @@ -325,11 +263,12 @@ int TLS_stream::tls_perform_handshake() void TLS_stream::close() { + TLS_PRINT("TLS_stream::close()\n"); //ERR_clear_error(); if (this->m_busy) { this->m_deferred_close = true; return; } - CloseCallback func = std::move(this->m_on_close); + CloseCallback func = getCloseCallback(); this->reset_callbacks(); if (m_transport->is_connected()) m_transport->close(); @@ -337,20 +276,14 @@ void TLS_stream::close() } void TLS_stream::close_callback_once() { + TLS_PRINT("TLS_stream::close_callback_once() \n"); if (this->m_busy) { this->m_deferred_close = true; return; } - CloseCallback func = std::move(this->m_on_close); + CloseCallback func = getCloseCallback(); this->reset_callbacks(); if (func) func(); } -void TLS_stream::reset_callbacks() -{ - this->m_on_close = nullptr; - this->m_on_connect = nullptr; - this->m_on_read = nullptr; - this->m_on_write = nullptr; -} bool TLS_stream::handshake_completed() const noexcept { From 1b875190e0efbcb72029ee4bb319327ff87b4be4 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 11 Jan 2019 09:43:52 +0100 Subject: [PATCH 69/93] pmr: make default resource throw on failure --- api/util/alloc_pmr.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/util/alloc_pmr.hpp b/api/util/alloc_pmr.hpp index 8cbe54c98f..e676dc2326 100644 --- a/api/util/alloc_pmr.hpp +++ b/api/util/alloc_pmr.hpp @@ -106,7 +106,10 @@ namespace os::mem { struct Default_pmr : public std::pmr::memory_resource { void* do_allocate(std::size_t size, std::size_t align) override { - return memalign(align, size); + auto* res = memalign(align, size); + if (res == nullptr) + throw std::bad_alloc(); + return res; } void do_deallocate (void* ptr, size_t, size_t) override { From 4317157a405300a339ab54856593793134c173f2 Mon Sep 17 00:00:00 2001 From: Alfred Bratterud Date: Fri, 11 Jan 2019 10:13:17 +0100 Subject: [PATCH 70/93] typo: violated address --- src/platform/x86_pc/idt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/platform/x86_pc/idt.cpp b/src/platform/x86_pc/idt.cpp index 4b979eb1c7..50904bf724 100644 --- a/src/platform/x86_pc/idt.cpp +++ b/src/platform/x86_pc/idt.cpp @@ -304,7 +304,7 @@ void __page_fault(uintptr_t* regs, uint32_t code) { auto& range = OS::memory_map().at(key); printf("Violated address is in mapped range \"%s\" \n", range.name()); } else { - printf("Violated ddress is outside mapped memory\n"); + printf("Violated address is outside mapped memory\n"); } } From 6a95e5158f843f76c5af716aee36d4b8d5fff4da Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Fri, 11 Jan 2019 10:48:09 +0100 Subject: [PATCH 71/93] microlb: Timer event to handle congestion --- api/net/openssl/tls_stream.hpp | 17 ++++++++++- api/net/stream_buffer.hpp | 54 +++++++++++++++++++++++++++++----- src/net/openssl/tls_stream.cpp | 40 ++++++++++++++++++++----- 3 files changed, 95 insertions(+), 16 deletions(-) diff --git a/api/net/openssl/tls_stream.hpp b/api/net/openssl/tls_stream.hpp index 5d0d6d0b7d..f4f3613785 100644 --- a/api/net/openssl/tls_stream.hpp +++ b/api/net/openssl/tls_stream.hpp @@ -62,8 +62,10 @@ namespace openssl size_t serialize_to(void*) const override; + void handle_read_congestion() override; + void handle_write_congestion() override; private: - void data(); + void handle_data(); int decrypt(const void *data,int size); int send_decrypted(); @@ -73,6 +75,19 @@ namespace openssl bool handshake_completed() const noexcept; void close_callback_once(); + //using Alloc = os::mem::buffer::allocator_type; + //deref a nullptr seems scary to me + /*Alloc &getAllocator() { return *allocator; } + //store ptr to allocator + void setAllocator(Alloc &alloc) + { + if (*allocator != alloc) + { + allocator=&alloc; + } + }*/ + //Alloc *allocator=nullptr; + buffer_t last_buffer; enum status_t { STATUS_OK, STATUS_WANT_IO, diff --git a/api/net/stream_buffer.hpp b/api/net/stream_buffer.hpp index c26664859f..5f8a8e202f 100644 --- a/api/net/stream_buffer.hpp +++ b/api/net/stream_buffer.hpp @@ -2,13 +2,19 @@ #define STREAMBUFFERR_HPP #include #include +#include + namespace net { class StreamBuffer : public net::Stream { public: + StreamBuffer(Timers::duration_t timeout=std::chrono::microseconds(10)) + : timer({this,&StreamBuffer::congested}),congestion_timeout(timeout) {} using buffer_t = os::mem::buf_ptr; using Ready_queue = std::deque; - //virtual ~StreamBuffer(); + virtual ~StreamBuffer() { + timer.stop(); + } void on_connect(ConnectCallback cb) override { m_on_connect = std::move(cb); @@ -67,6 +73,8 @@ namespace net { return construct_buffer_with_flag(m_write_congested,std::forward (args)...); } + virtual void handle_read_congestion() = 0; + virtual void handle_write_congestion() = 0; protected: void closed() { if (m_on_close) m_on_close(); } @@ -77,6 +85,8 @@ namespace net { void enqueue_data(buffer_t data) { m_send_buffers.push_back(data); } + void congested(); + CloseCallback getCloseCallback() { return std::move(this->m_on_close); } void reset_callbacks() override @@ -88,8 +98,10 @@ namespace net { this->m_on_write = nullptr; this->m_on_data = nullptr; } - private: + Timer timer; + private: + Timer::duration_t congestion_timeout; bool m_write_congested= false; bool m_read_congested = false; @@ -111,15 +123,16 @@ namespace net { template buffer_t construct_buffer_with_flag(bool &flag,Args&&... args) { - buffer_t buffer; + static buffer_t buffer; try { - buffer = std::make_shared (std::forward (args)...); + buffer = std::make_shared(std::forward (args)...); flag = false; } - catch (std::exception &e) + catch (std::bad_alloc &e) { flag = true; + timer.start(congestion_timeout); return nullptr; } return buffer; @@ -147,6 +160,33 @@ namespace net { return nullptr; } + inline void StreamBuffer::congested() + { + if (m_read_congested) + { + handle_read_congestion(); + } + if (m_write_congested) + { + handle_write_congestion(); + } + //if any of the congestion states are still active make sure the timer is running + if(m_read_congested or m_write_congested) + { + if (!timer.is_running()) + { + timer.start(congestion_timeout); + } + } + else + { + if (timer.is_running()) + { + timer.stop(); + } + } + } + inline void StreamBuffer::signal_data() { if (not m_send_buffers.empty()) @@ -155,8 +195,8 @@ namespace net { //on_data_callback(); m_on_data(); if (not m_send_buffers.empty()) { - // FIXME: Make sure this event gets re-triggered - // For now the user will have to make sure to re-read later if they couldn't + m_read_congested=true; + timer.start(congestion_timeout); } } else if (m_on_read != nullptr) diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp index 625b0aee88..6391da6da1 100644 --- a/src/net/openssl/tls_stream.cpp +++ b/src/net/openssl/tls_stream.cpp @@ -21,7 +21,7 @@ TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) SSL_set_bio(this->m_ssl, this->m_bio_rd, this->m_bio_wr); // always-on callbacks - m_transport->on_data({this,&TLS_stream::data}); + m_transport->on_data({this,&TLS_stream::handle_data}); m_transport->on_close({this, &TLS_stream::close_callback_once}); // start TLS handshake process @@ -29,13 +29,15 @@ TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) { if (this->tls_perform_handshake() < 0) return; } + last_buffer=std::make_shared>(); } TLS_stream::TLS_stream(Stream_ptr t, SSL* ssl, BIO* rd, BIO* wr) : m_transport(std::move(t)), m_ssl(ssl), m_bio_rd(rd), m_bio_wr(wr) { // always-on callbacks - m_transport->on_data({this, &TLS_stream::data}); + m_transport->on_data({this, &TLS_stream::handle_data}); m_transport->on_close({this, &TLS_stream::close_callback_once}); + last_buffer=std::make_shared>(); } TLS_stream::~TLS_stream() { @@ -45,6 +47,12 @@ TLS_stream::~TLS_stream() void TLS_stream::write(buffer_t buffer) { + //last_buffer=buffer; + //allocator=&buffer->get_allocator(); + /*if (UNLIKELY(allocator == nullptr)) + { + + }*/ if (UNLIKELY(this->is_connected() == false)) { TLS_PRINT("::write() called on closed stream\n"); return; @@ -64,13 +72,15 @@ void TLS_stream::write(buffer_t buffer) void TLS_stream::write(const std::string& str) { - write(net::Stream::construct_buffer(str.data(), str.data() + str.size())); + //TODO handle failed alloc + write(net::StreamBuffer::construct_write_buffer(str.data(),str.data()+str.size(),last_buffer->get_allocator())); } void TLS_stream::write(const void* data, const size_t len) { + //TODO handle failed alloc auto* buf = static_cast (data); - write(net::Stream::construct_buffer(buf, buf + len)); + write(net::StreamBuffer::construct_write_buffer(buf, buf + len,last_buffer->get_allocator())); } int TLS_stream::decrypt(const void *indata, int size) @@ -120,7 +130,7 @@ int TLS_stream::send_decrypted() // read decrypted data do { //TODO "increase the size ?") - auto buffer=StreamBuffer::construct_read_buffer(8192); + auto buffer=StreamBuffer::construct_read_buffer(8192,last_buffer->get_allocator()); if (!buffer) return 0; n = SSL_read(this->m_ssl,buffer->data(),buffer->size()); if (n > 0) { @@ -132,9 +142,21 @@ int TLS_stream::send_decrypted() return n; } -void TLS_stream::data() +void TLS_stream::handle_read_congestion() +{ + //no checking here..? + send_decrypted(); //decrypt any incomplete + signal_data(); //send any pending +} + +void TLS_stream::handle_write_congestion() +{ + //this should resolve the potential malloc congestion + tls_perform_stream_write(); +} +void TLS_stream::handle_data() { - buffer_t buf; + static buffer_t buf; while ((not read_congested() && (buf=m_transport->read_next()) != nullptr)) { TLS_PRINT("::data() Received %lu bytes\n",buf->size()); @@ -200,11 +222,13 @@ void TLS_stream::tls_read(buffer_t buffer) int TLS_stream::tls_perform_stream_write() { + static buffer_t buffer=nullptr; ERR_clear_error(); int pending = BIO_ctrl_pending(this->m_bio_wr); if (pending > 0) { - auto buffer= net::StreamBuffer::construct_write_buffer(pending); + TLS_PRINT("::tls_perform_stream_write() pending=%d bytes\n",pending); + buffer = net::StreamBuffer::construct_write_buffer(pending,last_buffer->get_allocator()); if (buffer == nullptr) { printf("Failed to construct buffer\n"); return 0; From 2c112ee9d344e2a92699e3918ecaebba2ee1464e Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Mon, 14 Jan 2019 14:21:03 +0100 Subject: [PATCH 72/93] 0.13.x: Cleaned up code --- api/net/openssl/tls_stream.hpp | 14 ------------- src/net/openssl/tls_stream.cpp | 37 +++++++++++++++------------------- 2 files changed, 16 insertions(+), 35 deletions(-) diff --git a/api/net/openssl/tls_stream.hpp b/api/net/openssl/tls_stream.hpp index f4f3613785..ea60df6d46 100644 --- a/api/net/openssl/tls_stream.hpp +++ b/api/net/openssl/tls_stream.hpp @@ -68,26 +68,12 @@ namespace openssl void handle_data(); int decrypt(const void *data,int size); int send_decrypted(); - void tls_read(buffer_t); int tls_perform_stream_write(); int tls_perform_handshake(); bool handshake_completed() const noexcept; void close_callback_once(); - //using Alloc = os::mem::buffer::allocator_type; - //deref a nullptr seems scary to me - /*Alloc &getAllocator() { return *allocator; } - //store ptr to allocator - void setAllocator(Alloc &alloc) - { - if (*allocator != alloc) - { - allocator=&alloc; - } - }*/ - //Alloc *allocator=nullptr; - buffer_t last_buffer; enum status_t { STATUS_OK, STATUS_WANT_IO, diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp index 6391da6da1..277d572d0c 100644 --- a/src/net/openssl/tls_stream.cpp +++ b/src/net/openssl/tls_stream.cpp @@ -29,7 +29,6 @@ TLS_stream::TLS_stream(SSL_CTX* ctx, Stream_ptr t, bool outgoing) { if (this->tls_perform_handshake() < 0) return; } - last_buffer=std::make_shared>(); } TLS_stream::TLS_stream(Stream_ptr t, SSL* ssl, BIO* rd, BIO* wr) : m_transport(std::move(t)), m_ssl(ssl), m_bio_rd(rd), m_bio_wr(wr) @@ -37,7 +36,6 @@ TLS_stream::TLS_stream(Stream_ptr t, SSL* ssl, BIO* rd, BIO* wr) // always-on callbacks m_transport->on_data({this, &TLS_stream::handle_data}); m_transport->on_close({this, &TLS_stream::close_callback_once}); - last_buffer=std::make_shared>(); } TLS_stream::~TLS_stream() { @@ -47,12 +45,7 @@ TLS_stream::~TLS_stream() void TLS_stream::write(buffer_t buffer) { - //last_buffer=buffer; - //allocator=&buffer->get_allocator(); - /*if (UNLIKELY(allocator == nullptr)) - { - }*/ if (UNLIKELY(this->is_connected() == false)) { TLS_PRINT("::write() called on closed stream\n"); return; @@ -73,14 +66,14 @@ void TLS_stream::write(buffer_t buffer) void TLS_stream::write(const std::string& str) { //TODO handle failed alloc - write(net::StreamBuffer::construct_write_buffer(str.data(),str.data()+str.size(),last_buffer->get_allocator())); + write(net::StreamBuffer::construct_write_buffer(str.data(),str.data()+str.size())); } void TLS_stream::write(const void* data, const size_t len) { //TODO handle failed alloc auto* buf = static_cast (data); - write(net::StreamBuffer::construct_write_buffer(buf, buf + len,last_buffer->get_allocator())); + write(net::StreamBuffer::construct_write_buffer(buf, buf + len)); } int TLS_stream::decrypt(const void *indata, int size) @@ -129,14 +122,13 @@ int TLS_stream::send_decrypted() buffer_t buffer; // read decrypted data do { - //TODO "increase the size ?") - auto buffer=StreamBuffer::construct_read_buffer(8192,last_buffer->get_allocator()); + //TODO "increase the size or constructor based ??") + auto buffer=StreamBuffer::construct_read_buffer(8192); if (!buffer) return 0; n = SSL_read(this->m_ssl,buffer->data(),buffer->size()); if (n > 0) { buffer->resize(n); enqueue_data(buffer); - // m_receive_buffers.push_back(buffer); } } while (n > 0); return n; @@ -144,7 +136,7 @@ int TLS_stream::send_decrypted() void TLS_stream::handle_read_congestion() { - //no checking here..? + //Ordering could be different send_decrypted(); //decrypt any incomplete signal_data(); //send any pending } @@ -152,20 +144,25 @@ void TLS_stream::handle_read_congestion() void TLS_stream::handle_write_congestion() { //this should resolve the potential malloc congestion - tls_perform_stream_write(); + //might be missing some TLS signalling but without malloc we cant do that either + while(tls_perform_stream_write() > 0); } void TLS_stream::handle_data() { - static buffer_t buf; - while ((not read_congested() && (buf=m_transport->read_next()) != nullptr)) + while (m_transport->next_size() > 0) { - TLS_PRINT("::data() Received %lu bytes\n",buf->size()); - tls_read(buf); + if (UNLIKELY(read_congested())){ + break; + } + tls_read(m_transport->read_next()); } } void TLS_stream::tls_read(buffer_t buffer) { + if (buffer == nullptr ) { + return; + } ERR_clear_error(); uint8_t* buf_ptr = buffer->data(); int len = buffer->size(); @@ -222,15 +219,13 @@ void TLS_stream::tls_read(buffer_t buffer) int TLS_stream::tls_perform_stream_write() { - static buffer_t buffer=nullptr; ERR_clear_error(); int pending = BIO_ctrl_pending(this->m_bio_wr); if (pending > 0) { TLS_PRINT("::tls_perform_stream_write() pending=%d bytes\n",pending); - buffer = net::StreamBuffer::construct_write_buffer(pending,last_buffer->get_allocator()); + auto buffer = net::StreamBuffer::construct_write_buffer(pending); if (buffer == nullptr) { - printf("Failed to construct buffer\n"); return 0; } int n = BIO_read(this->m_bio_wr, buffer->data(), buffer->size()); From 5d660155f64fa6c98c3e3bfce51c77f9da182ad3 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Mon, 14 Jan 2019 14:21:37 +0100 Subject: [PATCH 73/93] test: improved server.js so that it can generate file size on request --- test/net/integration/microLB/server.js | 55 ++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/test/net/integration/microLB/server.js b/test/net/integration/microLB/server.js index b6ea1bd9fb..bd9e5e589f 100644 --- a/test/net/integration/microLB/server.js +++ b/test/net/integration/microLB/server.js @@ -1,10 +1,14 @@ var http = require('http'); +var url = require('url') -var dataString = function() { - var len = 1024*1024 * 50; +var dataString = function(len) { return '#'.repeat(len); } +function randomData(len) { + return Array.from({length:len}, () => Math.floor(Math.random() * 40)); +} + var stringToColour = function(str) { var hash = 0; for (var i = 0; i < str.length; i++) { @@ -18,13 +22,56 @@ var stringToColour = function(str) { return colour; } -//We need a function which handles requests and send response -function handleRequest(request, response){ +function handleDigest(path, request, response) { response.setTimeout(500); var addr = request.connection.localPort; response.end(addr.toString() + dataString()); } +function handleFile(path,request, response) { + response.setTimeout(500); + var addr = request.connection.localPort; + var size = parseInt(path.replace("/",""),10); + + if (size == 0) {  + size=1024*64; + } + response.end(addr.toString() + dataString(size)); +} + +function defaultHandler(path,request,response) { + response.setTimeout(500); + var addr = request.connection.localPort; + response.end(addr.toString() + dataString(1024*1024*50)); +} + +var routes = new Map([ + ['/digest' , handleDigest], + ['/file' , handleFile] + ]); + +function findHandler(path) +{ + for (const [key,value] of routes.entries()) { + if (path.startsWith(key)) + { + return { pattern: key, func: value}; + } + } + return { pattern :'',func : defaultHandler}; +} + +function handleRequest(request, response){ + var parts = url.parse(request.url); + + var route = findHandler(parts.pathname); + if (route.func) + { + var path = parts.pathname.replace(route.pattern,''); + route.func(path,request,response); + } +} + http.createServer(handleRequest).listen(6001, '10.0.0.1'); http.createServer(handleRequest).listen(6002, '10.0.0.1'); http.createServer(handleRequest).listen(6003, '10.0.0.1'); From d0318e7a08a2bbe9231e9fa76cb27c8818f0a52f Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Mon, 14 Jan 2019 18:31:27 +0100 Subject: [PATCH 74/93] microlb: deferred destruction of stream elements --- lib/microLB/micro_lb/balancer.cpp | 29 ++++++++++++++++++++++++----- lib/microLB/micro_lb/balancer.hpp | 4 ++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index b9e8cefa79..25fa3794d3 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -92,7 +92,7 @@ namespace microLB } void Balancer::handle_connections() { - LBOUT("Handle_connections. %i waiting \n", queue.size()); + LBOUT("Handle_connections. %lu waiting \n", queue.size()); // stop any rethrow timer since this is a de-facto retry if (this->throw_retry_timer != Timers::UNUSED_ID) { Timers::stop(this->throw_retry_timer); @@ -251,16 +251,35 @@ namespace microLB assert(session.is_alive()); return session; } + + void Nodes::destroy_sessions() + { + for (auto& idx: closed_sessions) + { + auto &session=get_session(idx); + + // free session destroying potential unique ptr objects + session.incoming =nullptr; + session.outgoing=nullptr; + free_sessions.push_back(session.self); + LBOUT("Session %d destroyed (total = %d)\n", session.self, session_cnt); + } + closed_sessions.clear(); + } void Nodes::close_session(int idx) { auto& session = get_session(idx); // remove connections session.incoming->reset_callbacks(); - session.incoming = nullptr; session.outgoing->reset_callbacks(); - session.outgoing = nullptr; - // free session - free_sessions.push_back(session.self); + closed_sessions.push_back(session.self); + + if (!cleanup_timer.is_running()) + { + cleanup_timer.start(std::chrono::milliseconds(10),[this](){ + this->destroy_sessions(); + }); + } session_cnt--; LBOUT("Session %d closed (total = %d)\n", session.self, session_cnt); } diff --git a/lib/microLB/micro_lb/balancer.hpp b/lib/microLB/micro_lb/balancer.hpp index 924a5412cf..f05ffd8027 100644 --- a/lib/microLB/micro_lb/balancer.hpp +++ b/lib/microLB/micro_lb/balancer.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace microLB { @@ -78,6 +79,7 @@ namespace microLB net::Stream_ptr assign(net::Stream_ptr); Session& create_session(net::Stream_ptr inc, net::Stream_ptr out); void close_session(int); + void destroy_sessions(); Session& get_session(int); void serialize(liu::Storage&); @@ -90,8 +92,10 @@ namespace microLB int conn_iterator = 0; int algo_iterator = 0; const bool do_active_check; + Timer cleanup_timer; std::deque sessions; std::deque free_sessions; + std::deque closed_sessions; }; struct Balancer { From 28347fe632ee7267cceffd7590afc0b677145439 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Mon, 14 Jan 2019 18:33:44 +0100 Subject: [PATCH 75/93] tls_streams: further hardening of m_busy. --- src/net/openssl/tls_stream.cpp | 49 +++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp index 277d572d0c..81313b5427 100644 --- a/src/net/openssl/tls_stream.cpp +++ b/src/net/openssl/tls_stream.cpp @@ -111,7 +111,15 @@ int TLS_stream::decrypt(const void *indata, int size) // nothing more to do if still not finished if (handshake_completed() == false) return 0; // handshake success + this->m_busy=true; connected(); + this->m_busy=false; + + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); + return 0; + } } return n; } @@ -119,7 +127,6 @@ int TLS_stream::decrypt(const void *indata, int size) int TLS_stream::send_decrypted() { int n; - buffer_t buffer; // read decrypted data do { //TODO "increase the size or constructor based ??") @@ -138,7 +145,15 @@ void TLS_stream::handle_read_congestion() { //Ordering could be different send_decrypted(); //decrypt any incomplete + this->m_busy=true; signal_data(); //send any pending + this->m_busy=false; + + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); + return; + } } void TLS_stream::handle_write_congestion() @@ -169,14 +184,20 @@ void TLS_stream::tls_read(buffer_t buffer) while (len > 0) { + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close"); + this->close(); + return; + } + int decrypted_bytes=decrypt(buf_ptr,len); if (UNLIKELY(decrypted_bytes==0)) return; buf_ptr += decrypted_bytes; len -= decrypted_bytes; + //enqueues decrypted data int ret=send_decrypted(); - // this goes here? if (UNLIKELY(this->is_closing() || this->is_closed())) { TLS_PRINT("TLS_stream::SSL_read closed during read\n"); @@ -204,17 +225,19 @@ void TLS_stream::tls_read(buffer_t buffer) this->close(); return; } - // check deferred closing - if (this->m_deferred_close) { - TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); - this->close(); return; - } + } // while it < end //forward data this->m_busy=true; signal_data(); this->m_busy=false; + + // check deferred closing + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); return; + } } // tls_read() int TLS_stream::tls_perform_stream_write() @@ -238,7 +261,13 @@ int TLS_stream::tls_perform_stream_write() this->m_busy = true; stream_on_write(n); this->m_busy = false; + + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); return 0; + } } + if (UNLIKELY((pending = BIO_ctrl_pending(this->m_bio_wr)) > 0)) { return pending; @@ -247,6 +276,7 @@ int TLS_stream::tls_perform_stream_write() } BIO_read(this->m_bio_wr, nullptr, 0); + if (!BIO_should_retry(this->m_bio_wr)) { TLS_PRINT("::tls_perform_stream_write() close on !BIO_should_retry\n"); @@ -285,18 +315,23 @@ void TLS_stream::close() TLS_PRINT("TLS_stream::close()\n"); //ERR_clear_error(); if (this->m_busy) { + TLS_PRINT("TLS_stream::close() deferred\n"); this->m_deferred_close = true; return; } CloseCallback func = getCloseCallback(); this->reset_callbacks(); if (m_transport->is_connected()) + { m_transport->close(); + m_transport->reset_callbacks(); // ??? + } if (func) func(); } void TLS_stream::close_callback_once() { TLS_PRINT("TLS_stream::close_callback_once() \n"); if (this->m_busy) { + TLS_PRINT("TLS_stream::close_callback_once() deferred\n"); this->m_deferred_close = true; return; } CloseCallback func = getCloseCallback(); From 2eefdb1b1ab090fe0528d1651e1eac000602b007 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Mon, 14 Jan 2019 18:33:44 +0100 Subject: [PATCH 76/93] tls_streams: further hardening of m_busy. --- api/net/tcp/connection.hpp | 2 +- src/net/openssl/tls_stream.cpp | 49 +++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/api/net/tcp/connection.hpp b/api/net/tcp/connection.hpp index f49dfd4062..a66bc33d90 100644 --- a/api/net/tcp/connection.hpp +++ b/api/net/tcp/connection.hpp @@ -322,7 +322,7 @@ class Connection { * @return True if able to send, False otherwise. */ bool can_send() const noexcept - { return usable_window() and writeq.has_remaining_requests(); } + { return (usable_window() >= SMSS()) and writeq.has_remaining_requests(); } /** * @brief Return the "tuple" (id) of the connection. diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp index 277d572d0c..81313b5427 100644 --- a/src/net/openssl/tls_stream.cpp +++ b/src/net/openssl/tls_stream.cpp @@ -111,7 +111,15 @@ int TLS_stream::decrypt(const void *indata, int size) // nothing more to do if still not finished if (handshake_completed() == false) return 0; // handshake success + this->m_busy=true; connected(); + this->m_busy=false; + + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); + return 0; + } } return n; } @@ -119,7 +127,6 @@ int TLS_stream::decrypt(const void *indata, int size) int TLS_stream::send_decrypted() { int n; - buffer_t buffer; // read decrypted data do { //TODO "increase the size or constructor based ??") @@ -138,7 +145,15 @@ void TLS_stream::handle_read_congestion() { //Ordering could be different send_decrypted(); //decrypt any incomplete + this->m_busy=true; signal_data(); //send any pending + this->m_busy=false; + + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); + return; + } } void TLS_stream::handle_write_congestion() @@ -169,14 +184,20 @@ void TLS_stream::tls_read(buffer_t buffer) while (len > 0) { + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close"); + this->close(); + return; + } + int decrypted_bytes=decrypt(buf_ptr,len); if (UNLIKELY(decrypted_bytes==0)) return; buf_ptr += decrypted_bytes; len -= decrypted_bytes; + //enqueues decrypted data int ret=send_decrypted(); - // this goes here? if (UNLIKELY(this->is_closing() || this->is_closed())) { TLS_PRINT("TLS_stream::SSL_read closed during read\n"); @@ -204,17 +225,19 @@ void TLS_stream::tls_read(buffer_t buffer) this->close(); return; } - // check deferred closing - if (this->m_deferred_close) { - TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); - this->close(); return; - } + } // while it < end //forward data this->m_busy=true; signal_data(); this->m_busy=false; + + // check deferred closing + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); return; + } } // tls_read() int TLS_stream::tls_perform_stream_write() @@ -238,7 +261,13 @@ int TLS_stream::tls_perform_stream_write() this->m_busy = true; stream_on_write(n); this->m_busy = false; + + if (this->m_deferred_close) { + TLS_PRINT("::read() close on m_deferred_close after tls_perform_stream_write\n"); + this->close(); return 0; + } } + if (UNLIKELY((pending = BIO_ctrl_pending(this->m_bio_wr)) > 0)) { return pending; @@ -247,6 +276,7 @@ int TLS_stream::tls_perform_stream_write() } BIO_read(this->m_bio_wr, nullptr, 0); + if (!BIO_should_retry(this->m_bio_wr)) { TLS_PRINT("::tls_perform_stream_write() close on !BIO_should_retry\n"); @@ -285,18 +315,23 @@ void TLS_stream::close() TLS_PRINT("TLS_stream::close()\n"); //ERR_clear_error(); if (this->m_busy) { + TLS_PRINT("TLS_stream::close() deferred\n"); this->m_deferred_close = true; return; } CloseCallback func = getCloseCallback(); this->reset_callbacks(); if (m_transport->is_connected()) + { m_transport->close(); + m_transport->reset_callbacks(); // ??? + } if (func) func(); } void TLS_stream::close_callback_once() { TLS_PRINT("TLS_stream::close_callback_once() \n"); if (this->m_busy) { + TLS_PRINT("TLS_stream::close_callback_once() deferred\n"); this->m_deferred_close = true; return; } CloseCallback func = getCloseCallback(); From b6b381da16ed3ecca24c9a7f2f3744cc0e60344e Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Tue, 15 Jan 2019 16:25:31 +0100 Subject: [PATCH 77/93] 0.13.x: if reset callbacks is called in loop exit --- api/net/stream_buffer.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/api/net/stream_buffer.hpp b/api/net/stream_buffer.hpp index 5f8a8e202f..8aaf2a428f 100644 --- a/api/net/stream_buffer.hpp +++ b/api/net/stream_buffer.hpp @@ -205,6 +205,7 @@ namespace net { // Pop each time, in case callback leads to another call here. m_send_buffers.pop_front(); m_on_read(buf); + if (m_on_read == nullptr) { break; } //if calling m_on_read reset the callbacks exit } } } From 21c3c28d8146b984595cbaf3806ea249800d44a8 Mon Sep 17 00:00:00 2001 From: Kristian Jerpetjoen Date: Tue, 15 Jan 2019 16:25:31 +0100 Subject: [PATCH 78/93] 0.13.x: if reset callbacks is called in loop exit --- api/net/stream_buffer.hpp | 1 + src/net/openssl/tls_stream.cpp | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/api/net/stream_buffer.hpp b/api/net/stream_buffer.hpp index 5f8a8e202f..8aaf2a428f 100644 --- a/api/net/stream_buffer.hpp +++ b/api/net/stream_buffer.hpp @@ -205,6 +205,7 @@ namespace net { // Pop each time, in case callback leads to another call here. m_send_buffers.pop_front(); m_on_read(buf); + if (m_on_read == nullptr) { break; } //if calling m_on_read reset the callbacks exit } } } diff --git a/src/net/openssl/tls_stream.cpp b/src/net/openssl/tls_stream.cpp index 81313b5427..16722a15f2 100644 --- a/src/net/openssl/tls_stream.cpp +++ b/src/net/openssl/tls_stream.cpp @@ -164,12 +164,18 @@ void TLS_stream::handle_write_congestion() } void TLS_stream::handle_data() { - while (m_transport->next_size() > 0) + while ( m_transport->next_size() > 0) { if (UNLIKELY(read_congested())){ break; } tls_read(m_transport->read_next()); + //bail + if (m_transport == nullptr) + { + printf("m_transport \n"); + break; + } } } From 6ae0fc9aa60e10836613d4125ac8ce255b32e4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 18 Jan 2019 14:10:14 +0100 Subject: [PATCH 79/93] pmr: Don't promise more capacity than whats actually remaining in pool --- api/util/detail/alloc_pmr.hpp | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 68fbb2227d..58c4eaef9c 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -33,6 +33,7 @@ namespace os::mem::detail { void* do_allocate(size_t size, size_t align) override { if (UNLIKELY(size + allocated_ > cap_total_)) { + //printf("pmr about to throw bad alloc: sz=%zu alloc=%zu cap=%zu\n", size, allocated_, cap_total_); throw std::bad_alloc(); } @@ -46,6 +47,7 @@ namespace os::mem::detail { void* buf = memalign(align, size); if (buf == nullptr) { + //printf("pmr memalign return nullptr, throw bad alloc\n"); throw std::bad_alloc(); } @@ -152,7 +154,9 @@ namespace os::mem::detail { std::size_t resource_capacity() { if (cap_suballoc_ == 0) + { return cap_total_ / (used_resources_ + os::mem::Pmr_pool::resource_division_offset); + } return cap_suballoc_; } @@ -244,7 +248,9 @@ namespace os::mem { // Pmr_resource implementation // Pmr_resource::Pmr_resource(Pool_ptr p) : pool_{p} {} - std::size_t Pmr_resource::capacity() { return pool_->resource_capacity(); } + std::size_t Pmr_resource::capacity() { + return std::min(pool_->resource_capacity(), pool_->allocatable()); + } std::size_t Pmr_resource::allocatable() { auto cap = capacity(); if (used > cap) @@ -266,12 +272,19 @@ namespace os::mem { throw std::bad_alloc(); } - void* buf = pool_->allocate(size, align); - - used += size; - allocs++; - - return buf; + try + { + void* buf = pool_->allocate(size, align); + used += size; + allocs++; + return buf; + } + catch(const std::bad_alloc&) + { + //printf("Pool returned bad alloc, resource: used=%zu reported_cap=%zu allocatable=%zu\n", + // used, cap, allocatable()); + throw; + } } void Pmr_resource::do_deallocate(void* ptr, std::size_t s, std::size_t a) { From 08a8a28e6d452139c67f2a70a35ef7b0ca01a025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 18 Jan 2019 14:12:31 +0100 Subject: [PATCH 80/93] tcp: Throw exception when trying to connect if not enough resource mem --- src/net/tcp/connection.cpp | 3 ++- src/net/tcp/tcp.cpp | 28 +++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/net/tcp/connection.cpp b/src/net/tcp/connection.cpp index f467763613..38735407b4 100644 --- a/src/net/tcp/connection.cpp +++ b/src/net/tcp/connection.cpp @@ -53,8 +53,9 @@ Connection::Connection(TCP& host, Socket local, Socket remote, ConnectCallback c Connection::~Connection() { - //printf(" Deleted %p %s ACTIVE: %u\n", this, + //printf(" Deleted %p %s ACTIVE: %zu\n", this, // to_string().c_str(), host_.active_connections()); + rtx_clear(); } diff --git a/src/net/tcp/tcp.cpp b/src/net/tcp/tcp.cpp index f8f518a66c..a9392761a9 100644 --- a/src/net/tcp/tcp.cpp +++ b/src/net/tcp/tcp.cpp @@ -492,21 +492,27 @@ bool TCP::unbind(const Socket& socket) return false; } -bool TCP::add_connection(tcp::Connection_ptr conn) { +bool TCP::add_connection(tcp::Connection_ptr conn) +{ + const size_t alloc_thres = max_bufsize() * Read_request::buffer_limit; // Stat increment number of incoming connections (*incoming_connections_)++; debug(" Connection added %s \n", conn->to_string().c_str()); - conn->bufalloc = mempool_.get_resource(); + auto resource = mempool_.get_resource(); // Reject connection if we can't allocate memory - if (conn->bufalloc == nullptr - or conn->bufalloc->allocatable() < max_bufsize() * Read_request::buffer_limit){ + if(UNLIKELY(resource == nullptr or resource->allocatable() < alloc_thres)) + { conn->_on_cleanup_ = nullptr; conn->abort(); return false; } + conn->bufalloc = std::move(resource); + + //printf("New inc conn %s allocatable=%zu\n", conn->to_string().c_str(), conn->bufalloc->allocatable()); + Expects(conn->bufalloc != nullptr); conn->_on_cleanup({this, &TCP::close_connection}); return connections_.emplace(conn->tuple(), conn).second; @@ -514,6 +520,15 @@ bool TCP::add_connection(tcp::Connection_ptr conn) { Connection_ptr TCP::create_connection(Socket local, Socket remote, ConnectCallback cb) { + const size_t alloc_thres = max_bufsize() * Read_request::buffer_limit; + + auto resource = mempool_.get_resource(); + // Don't create connection if we can't allocate memory + if(UNLIKELY(resource == nullptr or resource->allocatable() < alloc_thres)) + { + throw TCP_error{"Unable to create new connection: Not enough allocatable memory"}; + } + // Stat increment number of outgoing connections (*outgoing_connections_)++; @@ -523,7 +538,10 @@ Connection_ptr TCP::create_connection(Socket local, Socket remote, ConnectCallba ) ).first->second; conn->_on_cleanup({this, &TCP::close_connection}); - conn->bufalloc = mempool_.get_resource(); + conn->bufalloc = std::move(resource); + + //printf("New out conn %s allocatable=%zu\n", conn->to_string().c_str(), conn->bufalloc->allocatable()); + Expects(conn->bufalloc != nullptr); return conn; } From 1b7b646ac1241634dfca9ccf6389e80ee80eae4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 18 Jan 2019 14:13:14 +0100 Subject: [PATCH 81/93] microlb: Handle exception when connect and terminate hack for unused backend connections --- lib/microLB/micro_lb/balancer.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 25fa3794d3..ffd0ba1b13 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -259,8 +259,12 @@ namespace microLB auto &session=get_session(idx); // free session destroying potential unique ptr objects - session.incoming =nullptr; - session.outgoing=nullptr; + session.incoming = nullptr; + auto out_tcp = dynamic_cast(session.outgoing->bottom_transport())->tcp(); + session.outgoing = nullptr; + // if we don't have anything to write to the backend, abort it. + if(not out_tcp->sendq_size()) + out_tcp->abort(); free_sessions.push_back(session.self); LBOUT("Session %d destroyed (total = %d)\n", session.self, session_cnt); } @@ -359,7 +363,17 @@ namespace microLB } void Node::connect() { - auto outgoing = this->stack.tcp().connect(this->addr); + net::tcp::Connection_ptr outgoing; + try + { + outgoing = this->stack.tcp().connect(this->addr); + } + catch(const net::TCP_error& err) + { + LBOUT("Got exception: %s\n", err.what()); + this->restart_active_check(); + return; + } // connecting to node atm. this->connecting++; // retry timer when connect takes too long From 27fd8afcd683c8b7d16d2bb62f6bc004dc867c32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Fri, 18 Jan 2019 14:23:41 +0100 Subject: [PATCH 82/93] misc: removed some dead code --- api/util/detail/alloc_pmr.hpp | 17 ++++------------- lib/microLB/micro_lb/balancer.cpp | 2 +- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 58c4eaef9c..73f67fb5cf 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -272,19 +272,10 @@ namespace os::mem { throw std::bad_alloc(); } - try - { - void* buf = pool_->allocate(size, align); - used += size; - allocs++; - return buf; - } - catch(const std::bad_alloc&) - { - //printf("Pool returned bad alloc, resource: used=%zu reported_cap=%zu allocatable=%zu\n", - // used, cap, allocatable()); - throw; - } + void* buf = pool_->allocate(size, align); + used += size; + allocs++; + return buf; } void Pmr_resource::do_deallocate(void* ptr, std::size_t s, std::size_t a) { diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index ffd0ba1b13..9b0f124da6 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -368,7 +368,7 @@ namespace microLB { outgoing = this->stack.tcp().connect(this->addr); } - catch(const net::TCP_error& err) + catch([[maybe_unused]]const net::TCP_error& err) { LBOUT("Got exception: %s\n", err.what()); this->restart_active_check(); From d4f295f1d801713b4c968656df2989f96c307512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Mon, 21 Jan 2019 10:08:30 +0100 Subject: [PATCH 83/93] net: Remove namespace from call to ntohs for building unittest on mac --- src/net/checksum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/net/checksum.cpp b/src/net/checksum.cpp index 065152fea5..db7e1035f6 100644 --- a/src/net/checksum.cpp +++ b/src/net/checksum.cpp @@ -133,7 +133,7 @@ uint16_t checksum(uint32_t tsum, const void* data, size_t length) noexcept vsum=(vsum & 0xFFFF)+(vsum>>16); } //allways right in this case as its allways little endian x86 - return ~net::ntohs((uint16_t)(vsum)); + return ~ntohs((uint16_t)(vsum)); #elif defined(__AVX2__) // VEX-align buffer while (((uintptr_t) buffer & 15) && length >= 4) { From 73e5f23a5eeb491891d4c7ecf0f4922ffecf95eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Mon, 21 Jan 2019 10:08:50 +0100 Subject: [PATCH 84/93] test: Update tcp read request test to reflect changes to API --- test/net/unit/tcp_read_request_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/net/unit/tcp_read_request_test.cpp b/test/net/unit/tcp_read_request_test.cpp index 4ff0e4f2a6..a157c024ad 100644 --- a/test/net/unit/tcp_read_request_test.cpp +++ b/test/net/unit/tcp_read_request_test.cpp @@ -34,7 +34,8 @@ CASE("Operating with out of order data") no_reads++; }; - auto req = std::make_unique(seq, BUFSZ, BUFSZ, read_cb); + auto req = std::make_unique(seq, BUFSZ, BUFSZ); + req->on_read_callback = read_cb; no_reads = 0; // Insert hole, first missing From 0081283991b95d9665ee194ccd54f16835411df2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Mon, 21 Jan 2019 14:31:28 +0100 Subject: [PATCH 85/93] net: removed unicode char in stream buffer code --- api/net/stream_buffer.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/net/stream_buffer.hpp b/api/net/stream_buffer.hpp index 8aaf2a428f..42347f485f 100644 --- a/api/net/stream_buffer.hpp +++ b/api/net/stream_buffer.hpp @@ -205,7 +205,9 @@ namespace net { // Pop each time, in case callback leads to another call here. m_send_buffers.pop_front(); m_on_read(buf); - if (m_on_read == nullptr) { break; } //if calling m_on_read reset the callbacks exit + if (m_on_read == nullptr) { + break; + } //if calling m_on_read reset the callbacks exit } } } From 82b0219521e1ed250781d36a02294dc473e084a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Mon, 21 Jan 2019 16:07:58 +0100 Subject: [PATCH 86/93] pmr: return subcap if set --- api/util/detail/alloc_pmr.hpp | 5 +++-- test/util/unit/pmr_alloc_test.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/api/util/detail/alloc_pmr.hpp b/api/util/detail/alloc_pmr.hpp index 73f67fb5cf..6a7e317622 100644 --- a/api/util/detail/alloc_pmr.hpp +++ b/api/util/detail/alloc_pmr.hpp @@ -155,7 +155,8 @@ namespace os::mem::detail { std::size_t resource_capacity() { if (cap_suballoc_ == 0) { - return cap_total_ / (used_resources_ + os::mem::Pmr_pool::resource_division_offset); + auto div = cap_total_ / (used_resources_ + os::mem::Pmr_pool::resource_division_offset); + return std::min(div, allocatable()); } return cap_suballoc_; } @@ -249,7 +250,7 @@ namespace os::mem { // Pmr_resource::Pmr_resource(Pool_ptr p) : pool_{p} {} std::size_t Pmr_resource::capacity() { - return std::min(pool_->resource_capacity(), pool_->allocatable()); + return pool_->resource_capacity(); } std::size_t Pmr_resource::allocatable() { auto cap = capacity(); diff --git a/test/util/unit/pmr_alloc_test.cpp b/test/util/unit/pmr_alloc_test.cpp index 51cdc1b0ae..1f537bc0b3 100644 --- a/test/util/unit/pmr_alloc_test.cpp +++ b/test/util/unit/pmr_alloc_test.cpp @@ -166,10 +166,37 @@ CASE("pmr::resource usage") { // Drain all the resources for (auto& res : resources) { + auto exp_alloc = resource_cap; + EXPECT(not res->full()); + EXPECT(pool.allocatable() >= exp_alloc); + EXPECT(res->allocatable() == exp_alloc); + EXPECT(res->allocated() == 0); + auto* p1 = res->allocate(1_KiB); + exp_alloc -= 1_KiB; + EXPECT(res->allocated() == 1_KiB); + EXPECT(res->capacity() == resource_cap); + EXPECT(pool.allocatable() >= exp_alloc); + EXPECT(res->allocatable() == exp_alloc); + auto* p2 = res->allocate(1_KiB); + exp_alloc -= 1_KiB; + EXPECT(res->allocated() == 2_KiB); + EXPECT(pool.allocatable() >= exp_alloc); + EXPECT(res->allocatable() == exp_alloc); + auto* p3 = res->allocate(1_KiB); + exp_alloc -= 1_KiB; + EXPECT(res->allocated() == 3_KiB); + EXPECT(pool.allocatable() >= exp_alloc); + EXPECT(res->allocatable() == exp_alloc); + auto* p4 = res->allocate(1_KiB); + exp_alloc -= 1_KiB; + EXPECT(res->allocated() == 4_KiB); + EXPECT(pool.allocatable() >= exp_alloc); + EXPECT(res->allocatable() == exp_alloc); + EXPECT(p1 != nullptr); EXPECT(p2 != nullptr); EXPECT(p3 != nullptr); From 1db1523103c6557ae06b3249ce7e038dfa3051b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Tue, 22 Jan 2019 11:55:55 +0100 Subject: [PATCH 87/93] net: Flush data in stream buffer when handler is set --- api/net/stream_buffer.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/net/stream_buffer.hpp b/api/net/stream_buffer.hpp index 42347f485f..700b5694ab 100644 --- a/api/net/stream_buffer.hpp +++ b/api/net/stream_buffer.hpp @@ -22,9 +22,11 @@ namespace net { void on_read(size_t, ReadCallback cb) override { m_on_read = std::move(cb); + signal_data(); } void on_data(DataCallback cb) override { m_on_data = std::move(cb); + signal_data(); } size_t next_size() override; From afc904434640ad32cdbec0f75af3c76980aa381a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Tue, 22 Jan 2019 11:57:10 +0100 Subject: [PATCH 88/93] microlb: Replace lambda with member functions --- lib/microLB/micro_lb/balancer.cpp | 47 +++++++++++++++---------------- lib/microLB/micro_lb/balancer.hpp | 3 ++ 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 9b0f124da6..9115313959 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -278,12 +278,8 @@ namespace microLB session.outgoing->reset_callbacks(); closed_sessions.push_back(session.self); - if (!cleanup_timer.is_running()) - { - cleanup_timer.start(std::chrono::milliseconds(10),[this](){ - this->destroy_sessions(); - }); - } + destroy_sessions(); + session_cnt--; LBOUT("Session %d closed (total = %d)\n", session.self, session_cnt); } @@ -322,7 +318,7 @@ namespace microLB this->restart_active_check(); } }); - } catch (std::exception& e) { + } catch (const std::exception&) { // do nothing, because might just be eph.ports used up } } @@ -440,34 +436,37 @@ namespace microLB : parent(n), self(idx), incoming(std::move(inc)), outgoing(std::move(out)) { - - incoming->on_data([this]() { - assert(this->is_alive()); - while((this->incoming->next_size() > 0) and this->outgoing->is_writable()) - { - this->outgoing->write(this->incoming->read_next()); - } - }); + incoming->on_data({this, &Session::flush_incoming}); incoming->on_close( [&nodes = n, idx] () { nodes.close_session(idx); }); - outgoing->on_data([this]() { - assert(this->is_alive()); - while((this->outgoing->next_size() > 0) and this->incoming->is_writable()) - { - this->incoming->write(this->outgoing->read_next()); - } - }); + outgoing->on_data({this, &Session::flush_outgoing}); outgoing->on_close( [&nodes = n, idx] () { nodes.close_session(idx); }); - - } bool Session::is_alive() const { return incoming != nullptr; } + + void Session::flush_incoming() + { + assert(this->is_alive()); + while((this->incoming->next_size() > 0) and this->outgoing->is_writable()) + { + this->outgoing->write(this->incoming->read_next()); + } + } + + void Session::flush_outgoing() + { + assert(this->is_alive()); + while((this->outgoing->next_size() > 0) and this->incoming->is_writable()) + { + this->incoming->write(this->outgoing->read_next()); + } + } } diff --git a/lib/microLB/micro_lb/balancer.hpp b/lib/microLB/micro_lb/balancer.hpp index f05ffd8027..473e76b475 100644 --- a/lib/microLB/micro_lb/balancer.hpp +++ b/lib/microLB/micro_lb/balancer.hpp @@ -28,6 +28,9 @@ namespace microLB const int self; net::Stream_ptr incoming; net::Stream_ptr outgoing; + + void flush_incoming(); + void flush_outgoing(); }; struct Node { From 9cfc81ec28e385988bfe37a8f9560c1d1e3349e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Tue, 22 Jan 2019 13:17:00 +0100 Subject: [PATCH 89/93] test: Increase memory for microLB test --- test/net/integration/microLB/vm.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/net/integration/microLB/vm.json b/test/net/integration/microLB/vm.json index 85f68a14cf..8c0a4549e9 100644 --- a/test/net/integration/microLB/vm.json +++ b/test/net/integration/microLB/vm.json @@ -6,5 +6,5 @@ {"device" : "virtio"}, {"device" : "virtio"} ], - "mem" : 64 + "mem" : 256 } From a18127611e7d7a8df842fe334ed2033621bd72b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alf-Andr=C3=A9=20Walla?= Date: Tue, 22 Jan 2019 04:37:17 -0800 Subject: [PATCH 90/93] microlb: Add try/catch to avoid silently invalidating the client queue --- lib/microLB/micro_lb/balancer.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index 770c5653ad..d335e032a7 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -74,16 +74,21 @@ namespace microLB auto& client = queue.front(); assert(client.conn != nullptr); if (client.conn->is_connected()) { - // NOTE: explicitly want to copy buffers - net::Stream_ptr rval = - nodes.assign(std::move(client.conn), client.readq); - if (rval == nullptr) { - // done with this queue item - queue.pop_front(); - } - else { - // put connection back in queue item - client.conn = std::move(rval); + try { + // NOTE: explicitly want to copy buffers + net::Stream_ptr rval = + nodes.assign(std::move(client.conn), client.readq); + if (rval == nullptr) { + // done with this queue item + queue.pop_front(); + } + else { + // put connection back in queue item + client.conn = std::move(rval); + } + } catch (...) { + queue.pop_front(); // we have no choice + throw; } } else { From dcfdbaa3355520ce9acd3ef40f423c74806da1b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alf-Andr=C3=A9=20Walla?= Date: Tue, 22 Jan 2019 05:17:27 -0800 Subject: [PATCH 91/93] microlb: Remove reference to readq --- lib/microLB/micro_lb/balancer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/microLB/micro_lb/balancer.cpp b/lib/microLB/micro_lb/balancer.cpp index a1a9adedec..f7c69c38e8 100644 --- a/lib/microLB/micro_lb/balancer.cpp +++ b/lib/microLB/micro_lb/balancer.cpp @@ -74,7 +74,7 @@ namespace microLB try { // NOTE: explicitly want to copy buffers net::Stream_ptr rval = - nodes.assign(std::move(client.conn), client.readq); + nodes.assign(std::move(client.conn)); if (rval == nullptr) { // done with this queue item queue.pop_front(); From 6a5f8a56bbe234f03eb5f6ede5d3372501e6c66b Mon Sep 17 00:00:00 2001 From: Martin Nordsletten Date: Tue, 22 Jan 2019 14:19:28 +0100 Subject: [PATCH 92/93] Test: Set a timeout for microlb get connection --- test/net/integration/microLB/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/net/integration/microLB/test.py b/test/net/integration/microLB/test.py index 24777b3f30..3fe5734756 100755 --- a/test/net/integration/microLB/test.py +++ b/test/net/integration/microLB/test.py @@ -16,8 +16,7 @@ expected_string = "#" * 1024 * 1024 * 50 def validateRequest(addr): - response = requests.get('https://10.0.0.68:443', verify=False) - #print (response.content) + response = requests.get('https://10.0.0.68:443', verify=False, timeout=5) return (response.content) == str(addr) + expected_string # start nodeJS From 948f230d9accf654f0ad1a1d351da9072c06198b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20=C3=85kesson?= Date: Tue, 22 Jan 2019 16:19:42 +0100 Subject: [PATCH 93/93] posix: Fix TCP posix hanging when data and FIN is recv right after eachother resulting in os block not getting time Co-authored-by: Martin Nordsletten --- api/net/tcp/connection_states.hpp | 8 ++++++-- src/posix/tcp_fd.cpp | 6 ++++-- test/posix/integration/tcp/test.py | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/api/net/tcp/connection_states.hpp b/api/net/tcp/connection_states.hpp index 869ae69de5..ca95f9acb2 100644 --- a/api/net/tcp/connection_states.hpp +++ b/api/net/tcp/connection_states.hpp @@ -352,11 +352,15 @@ class Connection::LastAck : public State { */ virtual Result handle(Connection&, Packet_view& in) override; - inline virtual std::string to_string() const override { + std::string to_string() const override { return "LAST-ACK"; }; - inline virtual bool is_closing() const override { + bool is_closing() const override { + return true; + } + + bool is_closed() const override { return true; } diff --git a/src/posix/tcp_fd.cpp b/src/posix/tcp_fd.cpp index a2cf6115bb..ca0d3f2d6d 100644 --- a/src/posix/tcp_fd.cpp +++ b/src/posix/tcp_fd.cpp @@ -311,8 +311,10 @@ ssize_t TCP_FD_Conn::recv(void* dest, size_t len, int) bytes = buffer->size(); }); - // BLOCK HERE - while (!done || !conn->is_readable()) { + // BLOCK HERE: + // 1. if we havent read the data we asked for + // 2. or we aren't readable but not closed (not 100% sure here hehe..) + while (!done || (!conn->is_readable() and !conn->is_closed())) { OS::block(); } // restore diff --git a/test/posix/integration/tcp/test.py b/test/posix/integration/tcp/test.py index 7ce8aab968..0c5b27c547 100755 --- a/test/posix/integration/tcp/test.py +++ b/test/posix/integration/tcp/test.py @@ -50,6 +50,7 @@ def TCP_connect(): sock.connect((HOST, PORT)) MESSAGE = "POSIX is for hipsters" sock.send(MESSAGE) + sock.close() def TCP_recv(trigger_line): server.listen(1)