Skip to content

Commit bb5c096

Browse files
authored
Merge pull request #450 from input-output-hk/feature/metrics
[CGKIELE-168] Add support for metrics
2 parents 82dba84 + 7c3b949 commit bb5c096

File tree

12 files changed

+263
-12
lines changed

12 files changed

+263
-12
lines changed

build.sbt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ val dep = {
5757
"org.jline" % "jline" % "3.1.2",
5858
"net.java.dev.jna" % "jna" % "4.5.1",
5959
"org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.5",
60-
"com.github.scopt" %% "scopt" % "3.7.0"
60+
"com.github.scopt" %% "scopt" % "3.7.0",
61+
62+
// Metrics (https://github.com/DataDog/java-dogstatsd-client)
63+
"com.datadoghq" % "java-dogstatsd-client" % "2.5"
6164
)
6265
}
6366

src/main/resources/application.conf

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,39 @@ mantis {
547547
port = 8888
548548
}
549549
}
550+
551+
metrics {
552+
# Set to `true` iff your deployment supports metrics collection.
553+
# We push metrics to a StatsD-compatible agent and we use Datadog for collecting them in one place.
554+
# We default to `false` here because we do not expect all deployments to support metrics collection.
555+
enabled = false
556+
557+
# The StatsD-compatible agent host.
558+
host = "localhost"
559+
560+
# The StatsD-compatible agent port.
561+
port = 8125
562+
563+
# All metrics sent will be tagged with the environment.
564+
# Sample values are: `public`, `private`, `production`, `dev`, depending on the use-case.
565+
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
566+
#
567+
# environment = null
568+
569+
# All metrics sent will be tagged with the deployment.
570+
# Sample values are: `kevm-testnet`, `iele-testnet`, `raft-kevm`, depending on the use-case.
571+
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
572+
#
573+
# deployment = null
574+
575+
# Size of the metrics requests queue.
576+
# If the queue contains that many outstanding requests to the metrics agent, then
577+
# subsequent requests are blocked until the queue has room again.
578+
queue-size = 1024
579+
580+
# Iff true, any errors during metrics client operations will be logged.
581+
log-errors = true
582+
}
550583
}
551584

552585
akka {

src/main/scala/io/iohk/ethereum/ledger/Ledger.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import io.iohk.ethereum.domain._
77
import io.iohk.ethereum.ledger.BlockExecutionError.{TxsExecutionError, ValidationBeforeExecError}
88
import io.iohk.ethereum.ledger.BlockQueue.Leaf
99
import io.iohk.ethereum.ledger.Ledger._
10+
import io.iohk.ethereum.metrics.{Metrics, MetricsClient}
1011
import io.iohk.ethereum.utils.Config.SyncConfig
1112
import io.iohk.ethereum.utils.{BlockchainConfig, DaoForkConfig, Logger}
1213
import io.iohk.ethereum.vm._
@@ -160,6 +161,12 @@ class LedgerImpl(
160161
importedBlocks.foreach { b =>
161162
log.debug(s"Imported new block (${b.header.number}: ${Hex.toHexString(b.header.hash.toArray)}) to the top of chain")
162163
}
164+
165+
if(importedBlocks.nonEmpty) {
166+
val maxNumber = importedBlocks.map(_.header.number).max
167+
MetricsClient.get().gauge(Metrics.LedgerImportBlockNumber, maxNumber.toLong)
168+
}
169+
163170
result
164171
}
165172

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package io.iohk.ethereum.metrics
2+
3+
object Metrics {
4+
/**
5+
* Signifies that Mantis has started.
6+
*/
7+
final val StartEvent = "start.event"
8+
9+
/**
10+
* Signifies that Mantis has stopped.
11+
*/
12+
final val StopEvent = "stop.event"
13+
14+
/**
15+
* Measures the block number of the last imported block.
16+
*/
17+
final val LedgerImportBlockNumber = "ledger.import.block.number"
18+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package io.iohk.ethereum.metrics
2+
3+
import java.util.concurrent.atomic.AtomicReference
4+
5+
import com.timgroup.statsd.{NoOpStatsDClient, NonBlockingStatsDClient, StatsDClient}
6+
import com.typesafe.config.Config
7+
import io.iohk.ethereum.utils.Logger
8+
9+
object MetricsClient extends Logger {
10+
private[this] final val NoOpClient = new NoOpStatsDClient
11+
private[this] final val clientRef = new AtomicReference[StatsDClient](NoOpClient)
12+
13+
private[this] def setOnce(client: StatsDClient): Boolean = clientRef.compareAndSet(NoOpClient, client)
14+
15+
/**
16+
* Retrieves the application-wide metrics client.
17+
*/
18+
def get(): StatsDClient = clientRef.get()
19+
20+
21+
/**
22+
* A prefix for all metrics.
23+
*/
24+
final val Prefix = "mantis" // TODO there are several other strings of this value. Can we consolidate?
25+
26+
/**
27+
* Default tags we send to StatsD (actually Datadog).
28+
*
29+
* See https://github.com/input-output-hk/iohk-ops/blob/618748e09035f7bc3e3b055818c0cde4cf1958ce/modules/production.nix#L15
30+
*/
31+
object Tag {
32+
final val Env = "env"
33+
final val Depl = "depl"
34+
}
35+
36+
def mkTag(name: String, value: String): String = s"$name:$value"
37+
38+
/**
39+
* Instantiates and configures the metrics client. This should happen once in the lifetime of the application.
40+
* After this call completes successfully, you can obtain the metrics client by using `MetricsClient.get()`.
41+
*/
42+
def configure(config: MetricsConfig): Unit = {
43+
val enabled = config.enabled
44+
45+
if(enabled) {
46+
val hostname = config.host
47+
val port = config.port
48+
val queueSize = config.queueSize
49+
val logErrors = config.logErrors
50+
val constantTags = Array(
51+
mkTag(Tag.Env, config.environment),
52+
mkTag(Tag.Depl, config.deployment)
53+
)
54+
val errorHandler = if(logErrors) new MetricsErrorHandler else null // null indicates NOOP
55+
56+
val client =
57+
new NonBlockingStatsDClient(
58+
Prefix,
59+
hostname,
60+
port,
61+
queueSize,
62+
constantTags,
63+
errorHandler
64+
)
65+
66+
if(setOnce(client)) {
67+
log.info(s"Configured metrics client: $client")
68+
} else {
69+
log.warn(s"Could not configure metrics client: $client")
70+
client.close()
71+
}
72+
}
73+
}
74+
75+
/**
76+
* Instantiates and configures the metrics client. This should happen once in the lifetime of the application.
77+
* After this call completes successfully, you can obtain the metrics client by using `MetricsClient.get()`.
78+
*/
79+
def configure(mantisConfig: Config): Unit = {
80+
val config = MetricsConfig(mantisConfig)
81+
configure(config)
82+
}
83+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package io.iohk.ethereum.metrics
2+
3+
import com.typesafe.config.{Config TypesafeConfig}
4+
5+
final case class MetricsConfig(
6+
enabled: Boolean,
7+
host: String,
8+
port: Int,
9+
queueSize: Int,
10+
logErrors: Boolean,
11+
environment: String, // `public`, `private`
12+
deployment: String // `testnet-kevm`, `testnet-iele`
13+
)
14+
15+
object MetricsConfig {
16+
object Keys {
17+
final val Metrics = "metrics"
18+
19+
final val Enabled = "enabled"
20+
final val Host = "host"
21+
final val Port = "port"
22+
final val QueueSize = "queue-size"
23+
final val LogErrors = "log-errors"
24+
25+
final val Environment = "environment"
26+
final val Deployment = "deployment"
27+
}
28+
29+
def apply(mantisConfig: TypesafeConfig): MetricsConfig = {
30+
val config = mantisConfig.getConfig(Keys.Metrics)
31+
32+
val enabled = config.getBoolean(Keys.Enabled)
33+
val host = config.getString(Keys.Host)
34+
val port = config.getInt(Keys.Port)
35+
val queueSize = config.getInt(Keys.QueueSize)
36+
val logErrors = config.getBoolean(Keys.LogErrors)
37+
38+
val environment = config.getString(Keys.Environment)
39+
val deployment = config.getString(Keys.Deployment)
40+
41+
MetricsConfig(
42+
enabled = enabled,
43+
host = host,
44+
port = port,
45+
queueSize = queueSize,
46+
logErrors = logErrors,
47+
environment = environment,
48+
deployment = environment
49+
)
50+
}
51+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package io.iohk.ethereum.metrics
2+
3+
import com.timgroup.statsd.StatsDClientErrorHandler
4+
import io.iohk.ethereum.utils.Logger
5+
6+
final class MetricsErrorHandler extends StatsDClientErrorHandler with Logger {
7+
def handle(exception: Exception): Unit =
8+
log.error("[" + classOf[StatsDClientErrorHandler].getSimpleName + "]", exception)
9+
}

src/main/scala/io/iohk/ethereum/nodebuilder/NodeBuilder.scala

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,26 @@ import io.iohk.ethereum.db.storage.pruning.PruningMode
1515
import io.iohk.ethereum.domain._
1616
import io.iohk.ethereum.jsonrpc.JsonRpcController.JsonRpcConfig
1717
import io.iohk.ethereum.jsonrpc.NetService.NetServiceConfig
18-
import io.iohk.ethereum.ledger._
19-
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor}
2018
import io.iohk.ethereum.jsonrpc._
2119
import io.iohk.ethereum.jsonrpc.server.http.JsonRpcHttpServer
2220
import io.iohk.ethereum.jsonrpc.server.ipc.JsonRpcIpcServer
2321
import io.iohk.ethereum.keystore.{KeyStore, KeyStoreImpl}
2422
import io.iohk.ethereum.ledger.Ledger.VMImpl
25-
import io.iohk.ethereum.network.PeerManagerActor.PeerConfiguration
23+
import io.iohk.ethereum.ledger._
2624
import io.iohk.ethereum.network.EtcPeerManagerActor.PeerInfo
27-
import io.iohk.ethereum.utils._
28-
29-
import scala.concurrent.ExecutionContext.Implicits.global
30-
import io.iohk.ethereum.network._
25+
import io.iohk.ethereum.network.PeerManagerActor.PeerConfiguration
3126
import io.iohk.ethereum.network.discovery.{DiscoveryConfig, DiscoveryListener, PeerDiscoveryManager}
3227
import io.iohk.ethereum.network.handshaker.{EtcHandshaker, EtcHandshakerConfiguration, Handshaker}
3328
import io.iohk.ethereum.network.p2p.EthereumMessageDecoder
3429
import io.iohk.ethereum.network.rlpx.AuthHandshaker
35-
import io.iohk.ethereum.transactions.PendingTransactionsManager
30+
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor, _}
3631
import io.iohk.ethereum.ommers.OmmersPool
3732
import io.iohk.ethereum.testmode.{TestLedgerBuilder, TestmodeConsensusBuilder}
33+
import io.iohk.ethereum.transactions.PendingTransactionsManager
3834
import io.iohk.ethereum.utils.Config.SyncConfig
35+
import io.iohk.ethereum.utils._
3936

37+
import scala.concurrent.ExecutionContext.Implicits.global
4038
import scala.util.{Failure, Success, Try}
4139

4240
// scalastyle:off number.of.types
@@ -438,7 +436,8 @@ trait SyncControllerBuilder {
438436
ommersPool,
439437
etcPeerManager,
440438
syncConfig,
441-
() => shutdown()), "sync-controller")
439+
() => shutdown()
440+
), "sync-controller")
442441

443442
}
444443

src/main/scala/io/iohk/ethereum/nodebuilder/StdNode.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ package io.iohk.ethereum.nodebuilder
22

33
import io.iohk.ethereum.blockchain.sync.SyncController
44
import io.iohk.ethereum.consensus.StdConsensusBuilder
5-
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor}
5+
import io.iohk.ethereum.metrics.{Metrics, MetricsClient}
66
import io.iohk.ethereum.network.discovery.DiscoveryListener
7+
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor}
78
import io.iohk.ethereum.testmode.{TestLedgerBuilder, TestmodeConsensusBuilder}
89
import io.iohk.ethereum.utils.Config
910

@@ -50,7 +51,16 @@ abstract class BaseNode extends Node {
5051
if (jsonRpcConfig.ipcServerConfig.enabled) jsonRpcIpcServer.run()
5152
}
5253

54+
private[this] def startMetricsClient(): Unit = {
55+
MetricsClient.configure(Config.config)
56+
57+
// Just produce a point in the graphs to signify Mantis has been (re)started.
58+
MetricsClient.get().gauge(Metrics.StartEvent, 1L)
59+
}
60+
5361
def start(): Unit = {
62+
startMetricsClient()
63+
5464
loadGenesisData()
5565

5666
startPeerManager()
@@ -81,6 +91,8 @@ abstract class BaseNode extends Node {
8191
if (jsonRpcConfig.ipcServerConfig.enabled) {
8292
tryAndLogFailure(() => jsonRpcIpcServer.close())
8393
}
94+
tryAndLogFailure(() => MetricsClient.get().gauge(Metrics.StopEvent, 1L))
95+
tryAndLogFailure(() => MetricsClient.get().close())
8496
}
8597
}
8698

src/universal/conf/mantis.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ include "blockchain.conf"
1414
include "sync.conf"
1515
include "misc.conf"
1616
include "consensus.conf"
17+
include "metrics.conf"
1718

1819
# Uncomment the following include to connect to the Ethereum Hard-Fork network.
1920
# Note that any settings in this file will override the ones defined in the files above.

src/universal/conf/metrics.conf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
mantis {
2+
metrics {
3+
# Set to `true` iff your deployment supports metrics collection.
4+
# We push metrics to a StatsD-compatible agent and we use Datadog for collecting them in one place.
5+
# We default to `false` here because we do not expect all deployments to support metrics collection.
6+
# enabled = false
7+
8+
# The StatsD-compatible agent host.
9+
# host = "localhost"
10+
11+
# The StatsD-compatible agent port.
12+
# port = 8125
13+
14+
# All metrics sent will be tagged with the environment.
15+
# Sample values are: `public`, `private`, `production`, `dev`, depending on the use-case.
16+
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
17+
#
18+
# environment = null
19+
20+
# All metrics sent will be tagged with the deployment.
21+
# Sample values are: `kevm-testnet`, `iele-testnet`, `raft-kevm`, depending on the use-case.
22+
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
23+
#
24+
# deployment = null
25+
26+
# Size of the metrics requests queue.
27+
# If the queue contains that many outstanding requests to the metrics agent, then
28+
# subsequent requests are blocked until the queue has room again.
29+
# queue-size = 1024
30+
31+
# Iff true, any errors during metrics client operations will be logged.
32+
# log-errors = true
33+
}
34+
}

verify.sbt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ verifyDependencies in verify ++= Seq(
9292
"io.atomix" % "atomix-storage" sha1 "136f0b221acbc2680f099b8ff3a34f8cc1592fe7",
9393
"io.atomix" % "atomix-primary-backup" sha1 "1c895965e3e67a152ffbccb4283b6cee91b4ea61",
9494
"io.netty" % "netty-tcnative-boringssl-static" sha1 "ff5f2d6db5aaa1b4df1b381382cd6581844aad9d",
95-
"com.github.scopt" % "scopt" sha1 "e078455e1a65597146f8608dab3247bf1eb92e6e"
95+
"com.github.scopt" % "scopt" sha1 "e078455e1a65597146f8608dab3247bf1eb92e6e",
96+
"com.datadoghq" % "java-dogstatsd-client" sha1 "a9380127a42855a76af7787840a3a04b9fc4ce20"
9697
)
9798

9899
verifyOptions in verify := VerifyOptions(

0 commit comments

Comments
 (0)