Skip to content

Commit 97f29fe

Browse files
authored
Merge pull request #102 from input-output-hk/ETCM-168-fix-stalled-enrollment
ETCM-168: Fix stalled enrollments
2 parents feef9b2 + e214757 commit 97f29fe

File tree

2 files changed

+55
-33
lines changed

2 files changed

+55
-33
lines changed

build.sc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ trait ScalanetModule extends ScalaModule {
6868
trait ScalanetPublishModule extends PublishModule {
6969
def description: String
7070

71-
override def publishVersion = "0.4-SNAPSHOT"
71+
override def publishVersion = "0.4.1-SNAPSHOT"
7272

7373
override def pomSettings = PomSettings(
7474
description = description,

scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryService.scala

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ object DiscoveryService {
407407
case false =>
408408
initBond(peer).flatMap {
409409
case Some(result) =>
410-
result.pongReceived.get
410+
result.pongReceived.get.timeoutTo(config.requestTimeout, Task.pure(false))
411411

412412
case None =>
413413
Task(logger.debug(s"Trying to bond with $peer...")) >>
@@ -433,6 +433,7 @@ object DiscoveryService {
433433
_ <- completePong(peer, responded = false)
434434
} yield false
435435
}
436+
.guarantee(stateRef.update(_.clearBondingResults(peer)))
436437
}
437438
}
438439

@@ -563,7 +564,7 @@ object DiscoveryService {
563564

564565
waitOrFetch.flatMap {
565566
case Left(wait) =>
566-
wait.get
567+
wait.get.timeoutTo(config.requestTimeout, Task.pure(None))
567568

568569
case Right(fetch) =>
569570
val maybeEnr = bond(peer).flatMap {
@@ -729,44 +730,55 @@ object DiscoveryService {
729730
case true =>
730731
rpc
731732
.findNode(peer)(target)
732-
.map(_.map(_.toList).getOrElse(Nil))
733+
.flatMap {
734+
case None =>
735+
Task(logger.debug(s"Received no response for neighbors for $target from ${peer.address}")).as(Nil)
736+
case Some(neighbors) =>
737+
Task(logger.debug(s"Received ${neighbors.size} neighbors for $target from ${peer.address}"))
738+
.as(neighbors.toList)
739+
}
733740
.flatMap { neighbors =>
734741
neighbors.filterA { neighbor =>
735742
if (neighbor.address.checkRelay(peer))
736743
Task.pure(true)
737744
else
738-
Task(logger.debug(s"Ignoring neighbor $neighbor from $peer because of invalid relay IP.")).as(false)
745+
Task(logger.debug(s"Ignoring neighbor $neighbor from ${peer.address} because of invalid relay IP."))
746+
.as(false)
739747
}
740748
}
741749
.recoverWith {
742750
case NonFatal(ex) =>
743-
Task(logger.debug(s"Failed to fetch neighbors of $target from $from: $ex")).as(Nil)
751+
Task(logger.debug(s"Failed to fetch neighbors of $target from ${peer.address}: $ex")).as(Nil)
744752
}
745753
case false =>
746-
Task(logger.debug(s"Could not bond with $from to fetch neighbors of $target")).as(Nil)
754+
Task(logger.debug(s"Could not bond with ${peer.address} to fetch neighbors of $target")).as(Nil)
747755
}
748756
}
749757

750758
// Make sure these new nodes can be bonded with before we consider them,
751759
// otherwise they might appear to be be closer to the target but actually
752760
// be fakes with unreachable addresses that could knock out legit nodes.
753-
def bondNeighbors(neighbors: Seq[Node]): Task[Seq[Node]] = {
754-
Task
755-
.parTraverseUnordered(neighbors) { neighbor =>
756-
bond(toPeer(neighbor)).flatMap {
757-
case true =>
758-
Task.pure(Some(neighbor))
759-
case false =>
760-
Task(logger.debug(s"Could not bond with neighbor candidate $neighbor")).as(None)
761+
def bondNeighbors(neighbors: Seq[Node]): Task[Seq[Node]] =
762+
for {
763+
_ <- Task(logger.debug(s"Bonding with ${neighbors.size} neighbors..."))
764+
bonded <- Task
765+
.parTraverseN(config.kademliaAlpha)(neighbors) { neighbor =>
766+
bond(toPeer(neighbor)).flatMap {
767+
case true =>
768+
Task.pure(Some(neighbor))
769+
case false =>
770+
Task(logger.debug(s"Could not bond with neighbor candidate $neighbor")).as(None)
771+
}
761772
}
762-
}
763-
.map(_.flatten)
764-
}
773+
.map(_.flatten)
774+
_ <- Task(logger.debug(s"Bonded with ${bonded.size} neighbors out of ${neighbors.size}."))
775+
} yield bonded
765776

766777
def loop(
767778
local: Node,
768779
closest: SortedSet[Node],
769-
asked: Set[Node]
780+
asked: Set[Node],
781+
neighbors: Set[Node]
770782
): Task[SortedSet[Node]] = {
771783
// Contact the alpha closest nodes to the target that we haven't asked before.
772784
val contacts = closest
@@ -775,24 +787,32 @@ object DiscoveryService {
775787
.take(config.kademliaAlpha)
776788
.toList
777789

778-
if (contacts.isEmpty)
779-
Task.pure(closest)
780-
else {
781-
Task
782-
.parTraverseUnordered(contacts)(fetchNeighbors)
783-
.map(_.flatten.distinct)
784-
.flatMap(bondNeighbors)
785-
.flatMap { newNeighbors =>
786-
val newClosest = (closest ++ newNeighbors).take(config.kademliaBucketSize)
787-
val newAsked = asked ++ contacts
788-
loop(local, newClosest, newAsked)
789-
}
790+
if (contacts.isEmpty) {
791+
Task(
792+
logger.debug(s"Lookup for $target finished; asked ${asked.size} nodes, found ${neighbors.size} neighbors.")
793+
).as(closest)
794+
} else {
795+
Task(
796+
logger.debug(s"Lookup for $target contacting ${contacts.size} new nodes; asked ${asked.size} nodes so far.")
797+
) >>
798+
Task
799+
.parTraverseUnordered(contacts)(fetchNeighbors)
800+
.map(_.flatten.distinct.filterNot(neighbors))
801+
.flatMap(bondNeighbors)
802+
.flatMap { newNeighbors =>
803+
val nextClosest = (closest ++ newNeighbors).take(config.kademliaBucketSize)
804+
val nextAsked = asked ++ contacts
805+
val nextNeighbors = neighbors ++ newNeighbors
806+
val newClosest = nextClosest diff closest
807+
Task(logger.debug(s"Lookup for $target found ${newClosest.size} neighbors closer than before.")) >>
808+
loop(local, nextClosest, nextAsked, nextNeighbors)
809+
}
790810
}
791811
}
792812

793813
init.flatMap {
794814
case (localNode, closestNodes) =>
795-
loop(localNode, closest = SortedSet(closestNodes: _*), asked = Set(localNode))
815+
loop(localNode, closest = SortedSet(closestNodes: _*), asked = Set(localNode), neighbors = closestNodes.toSet)
796816
}
797817
}
798818

@@ -825,7 +845,9 @@ object DiscoveryService {
825845
_ <- Task(
826846
logger.info(s"Successfully enrolled with $enrolled bootstrap nodes. Performing initial lookup...")
827847
)
828-
_ <- lookup(nodeId)
848+
_ <- lookup(nodeId).doOnFinish {
849+
_.fold(Task.unit)(ex => Task(logger.error(s"Error during initial lookup", ex)))
850+
}
829851
nodeCount <- stateRef.get.map(_.nodeMap.size)
830852
_ <- Task(logger.info(s"Discovered $nodeCount nodes by the end of the lookup."))
831853
} yield ()

0 commit comments

Comments
 (0)