Skip to content

[CGKIELE-168] Add support for metrics #450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ val dep = {
"org.jline" % "jline" % "3.1.2",
"net.java.dev.jna" % "jna" % "4.5.1",
"org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.5",
"com.github.scopt" %% "scopt" % "3.7.0"
"com.github.scopt" %% "scopt" % "3.7.0",

// Metrics (https://github.com/DataDog/java-dogstatsd-client)
"com.datadoghq" % "java-dogstatsd-client" % "2.5"
)
}

Expand Down
33 changes: 33 additions & 0 deletions src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,39 @@ mantis {
port = 8888
}
}

metrics {
# Set to `true` iff your deployment supports metrics collection.
# We push metrics to a StatsD-compatible agent and we use Datadog for collecting them in one place.
# We default to `false` here because we do not expect all deployments to support metrics collection.
enabled = false

# The StatsD-compatible agent host.
host = "localhost"

# The StatsD-compatible agent port.
port = 8125

# All metrics sent will be tagged with the environment.
# Sample values are: `public`, `private`, `production`, `dev`, depending on the use-case.
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
#
# environment = null

# All metrics sent will be tagged with the deployment.
# Sample values are: `kevm-testnet`, `iele-testnet`, `raft-kevm`, depending on the use-case.
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
#
# deployment = null

# Size of the metrics requests queue.
# If the queue contains that many outstanding requests to the metrics agent, then
# subsequent requests are blocked until the queue has room again.
queue-size = 1024

# Iff true, any errors during metrics client operations will be logged.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Iff - do you mean "if and only if" or is it a typo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:-D

log-errors = true
}
}

akka {
Expand Down
7 changes: 7 additions & 0 deletions src/main/scala/io/iohk/ethereum/ledger/Ledger.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import io.iohk.ethereum.domain._
import io.iohk.ethereum.ledger.BlockExecutionError.{TxsExecutionError, ValidationBeforeExecError}
import io.iohk.ethereum.ledger.BlockQueue.Leaf
import io.iohk.ethereum.ledger.Ledger._
import io.iohk.ethereum.metrics.{Metrics, MetricsClient}
import io.iohk.ethereum.utils.Config.SyncConfig
import io.iohk.ethereum.utils.{BlockchainConfig, DaoForkConfig, Logger}
import io.iohk.ethereum.vm._
Expand Down Expand Up @@ -160,6 +161,12 @@ class LedgerImpl(
importedBlocks.foreach { b =>
log.debug(s"Imported new block (${b.header.number}: ${Hex.toHexString(b.header.hash.toArray)}) to the top of chain")
}

if(importedBlocks.nonEmpty) {
val maxNumber = importedBlocks.map(_.header.number).max
MetricsClient.get().gauge(Metrics.LedgerImportBlockNumber, maxNumber.toLong)
}

result
}

Expand Down
18 changes: 18 additions & 0 deletions src/main/scala/io/iohk/ethereum/metrics/Metrics.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package io.iohk.ethereum.metrics

object Metrics {
/**
* Signifies that Mantis has started.
*/
final val StartEvent = "start.event"

/**
* Signifies that Mantis has stopped.
*/
final val StopEvent = "stop.event"

/**
* Measures the block number of the last imported block.
*/
final val LedgerImportBlockNumber = "ledger.import.block.number"
}
83 changes: 83 additions & 0 deletions src/main/scala/io/iohk/ethereum/metrics/MetricsClient.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package io.iohk.ethereum.metrics

import java.util.concurrent.atomic.AtomicReference

import com.timgroup.statsd.{NoOpStatsDClient, NonBlockingStatsDClient, StatsDClient}
import com.typesafe.config.Config
import io.iohk.ethereum.utils.Logger

object MetricsClient extends Logger {
private[this] final val NoOpClient = new NoOpStatsDClient
private[this] final val clientRef = new AtomicReference[StatsDClient](NoOpClient)

private[this] def setOnce(client: StatsDClient): Boolean = clientRef.compareAndSet(NoOpClient, client)

/**
* Retrieves the application-wide metrics client.
*/
def get(): StatsDClient = clientRef.get()


/**
* A prefix for all metrics.
*/
final val Prefix = "mantis" // TODO there are several other strings of this value. Can we consolidate?

/**
* Default tags we send to StatsD (actually Datadog).
*
* See https://github.com/input-output-hk/iohk-ops/blob/618748e09035f7bc3e3b055818c0cde4cf1958ce/modules/production.nix#L15
*/
object Tag {
final val Env = "env"
final val Depl = "depl"
}

def mkTag(name: String, value: String): String = s"$name:$value"

/**
* Instantiates and configures the metrics client. This should happen once in the lifetime of the application.
* After this call completes successfully, you can obtain the metrics client by using `MetricsClient.get()`.
*/
def configure(config: MetricsConfig): Unit = {
val enabled = config.enabled

if(enabled) {
val hostname = config.host
val port = config.port
val queueSize = config.queueSize
val logErrors = config.logErrors
val constantTags = Array(
mkTag(Tag.Env, config.environment),
mkTag(Tag.Depl, config.deployment)
)
val errorHandler = if(logErrors) new MetricsErrorHandler else null // null indicates NOOP

val client =
new NonBlockingStatsDClient(
Prefix,
hostname,
port,
queueSize,
constantTags,
errorHandler
)

if(setOnce(client)) {
log.info(s"Configured metrics client: $client")
} else {
log.warn(s"Could not configure metrics client: $client")
client.close()
}
}
}

/**
* Instantiates and configures the metrics client. This should happen once in the lifetime of the application.
* After this call completes successfully, you can obtain the metrics client by using `MetricsClient.get()`.
*/
def configure(mantisConfig: Config): Unit = {
val config = MetricsConfig(mantisConfig)
configure(config)
}
}
51 changes: 51 additions & 0 deletions src/main/scala/io/iohk/ethereum/metrics/MetricsConfig.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package io.iohk.ethereum.metrics

import com.typesafe.config.{Config ⇒ TypesafeConfig}

final case class MetricsConfig(
enabled: Boolean,
host: String,
port: Int,
queueSize: Int,
logErrors: Boolean,
environment: String, // `public`, `private`
deployment: String // `testnet-kevm`, `testnet-iele`
)

object MetricsConfig {
object Keys {
final val Metrics = "metrics"

final val Enabled = "enabled"
final val Host = "host"
final val Port = "port"
final val QueueSize = "queue-size"
final val LogErrors = "log-errors"

final val Environment = "environment"
final val Deployment = "deployment"
}

def apply(mantisConfig: TypesafeConfig): MetricsConfig = {
val config = mantisConfig.getConfig(Keys.Metrics)

val enabled = config.getBoolean(Keys.Enabled)
val host = config.getString(Keys.Host)
val port = config.getInt(Keys.Port)
val queueSize = config.getInt(Keys.QueueSize)
val logErrors = config.getBoolean(Keys.LogErrors)

val environment = config.getString(Keys.Environment)
val deployment = config.getString(Keys.Deployment)

MetricsConfig(
enabled = enabled,
host = host,
port = port,
queueSize = queueSize,
logErrors = logErrors,
environment = environment,
deployment = environment
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package io.iohk.ethereum.metrics

import com.timgroup.statsd.StatsDClientErrorHandler
import io.iohk.ethereum.utils.Logger

final class MetricsErrorHandler extends StatsDClientErrorHandler with Logger {
def handle(exception: Exception): Unit =
log.error("[" + classOf[StatsDClientErrorHandler].getSimpleName + "]", exception)
}
17 changes: 8 additions & 9 deletions src/main/scala/io/iohk/ethereum/nodebuilder/NodeBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,26 @@ import io.iohk.ethereum.db.storage.pruning.PruningMode
import io.iohk.ethereum.domain._
import io.iohk.ethereum.jsonrpc.JsonRpcController.JsonRpcConfig
import io.iohk.ethereum.jsonrpc.NetService.NetServiceConfig
import io.iohk.ethereum.ledger._
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor}
import io.iohk.ethereum.jsonrpc._
import io.iohk.ethereum.jsonrpc.server.http.JsonRpcHttpServer
import io.iohk.ethereum.jsonrpc.server.ipc.JsonRpcIpcServer
import io.iohk.ethereum.keystore.{KeyStore, KeyStoreImpl}
import io.iohk.ethereum.ledger.Ledger.VMImpl
import io.iohk.ethereum.network.PeerManagerActor.PeerConfiguration
import io.iohk.ethereum.ledger._
import io.iohk.ethereum.network.EtcPeerManagerActor.PeerInfo
import io.iohk.ethereum.utils._

import scala.concurrent.ExecutionContext.Implicits.global
import io.iohk.ethereum.network._
import io.iohk.ethereum.network.PeerManagerActor.PeerConfiguration
import io.iohk.ethereum.network.discovery.{DiscoveryConfig, DiscoveryListener, PeerDiscoveryManager}
import io.iohk.ethereum.network.handshaker.{EtcHandshaker, EtcHandshakerConfiguration, Handshaker}
import io.iohk.ethereum.network.p2p.EthereumMessageDecoder
import io.iohk.ethereum.network.rlpx.AuthHandshaker
import io.iohk.ethereum.transactions.PendingTransactionsManager
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor, _}
import io.iohk.ethereum.ommers.OmmersPool
import io.iohk.ethereum.testmode.{TestLedgerBuilder, TestmodeConsensusBuilder}
import io.iohk.ethereum.transactions.PendingTransactionsManager
import io.iohk.ethereum.utils.Config.SyncConfig
import io.iohk.ethereum.utils._

import scala.concurrent.ExecutionContext.Implicits.global
import scala.util.{Failure, Success, Try}

// scalastyle:off number.of.types
Expand Down Expand Up @@ -438,7 +436,8 @@ trait SyncControllerBuilder {
ommersPool,
etcPeerManager,
syncConfig,
() => shutdown()), "sync-controller")
() => shutdown()
), "sync-controller")

}

Expand Down
14 changes: 13 additions & 1 deletion src/main/scala/io/iohk/ethereum/nodebuilder/StdNode.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ package io.iohk.ethereum.nodebuilder

import io.iohk.ethereum.blockchain.sync.SyncController
import io.iohk.ethereum.consensus.StdConsensusBuilder
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor}
import io.iohk.ethereum.metrics.{Metrics, MetricsClient}
import io.iohk.ethereum.network.discovery.DiscoveryListener
import io.iohk.ethereum.network.{PeerManagerActor, ServerActor}
import io.iohk.ethereum.testmode.{TestLedgerBuilder, TestmodeConsensusBuilder}
import io.iohk.ethereum.utils.Config

Expand Down Expand Up @@ -50,7 +51,16 @@ abstract class BaseNode extends Node {
if (jsonRpcConfig.ipcServerConfig.enabled) jsonRpcIpcServer.run()
}

private[this] def startMetricsClient(): Unit = {
MetricsClient.configure(Config.config)

// Just produce a point in the graphs to signify Mantis has been (re)started.
MetricsClient.get().gauge(Metrics.StartEvent, 1L)
}

def start(): Unit = {
startMetricsClient()

loadGenesisData()

startPeerManager()
Expand Down Expand Up @@ -81,6 +91,8 @@ abstract class BaseNode extends Node {
if (jsonRpcConfig.ipcServerConfig.enabled) {
tryAndLogFailure(() => jsonRpcIpcServer.close())
}
tryAndLogFailure(() => MetricsClient.get().gauge(Metrics.StopEvent, 1L))
tryAndLogFailure(() => MetricsClient.get().close())
}
}

Expand Down
1 change: 1 addition & 0 deletions src/universal/conf/mantis.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ include "blockchain.conf"
include "sync.conf"
include "misc.conf"
include "consensus.conf"
include "metrics.conf"

# Uncomment the following include to connect to the Ethereum Hard-Fork network.
# Note that any settings in this file will override the ones defined in the files above.
Expand Down
34 changes: 34 additions & 0 deletions src/universal/conf/metrics.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
mantis {
metrics {
# Set to `true` iff your deployment supports metrics collection.
# We push metrics to a StatsD-compatible agent and we use Datadog for collecting them in one place.
# We default to `false` here because we do not expect all deployments to support metrics collection.
# enabled = false

# The StatsD-compatible agent host.
# host = "localhost"

# The StatsD-compatible agent port.
# port = 8125

# All metrics sent will be tagged with the environment.
# Sample values are: `public`, `private`, `production`, `dev`, depending on the use-case.
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
#
# environment = null

# All metrics sent will be tagged with the deployment.
# Sample values are: `kevm-testnet`, `iele-testnet`, `raft-kevm`, depending on the use-case.
# If metrics are `enabled`, this is mandatory and must be set explicitly to a non-null value.
#
# deployment = null

# Size of the metrics requests queue.
# If the queue contains that many outstanding requests to the metrics agent, then
# subsequent requests are blocked until the queue has room again.
# queue-size = 1024

# Iff true, any errors during metrics client operations will be logged.
# log-errors = true
}
}
3 changes: 2 additions & 1 deletion verify.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ verifyDependencies in verify ++= Seq(
"io.atomix" % "atomix-storage" sha1 "136f0b221acbc2680f099b8ff3a34f8cc1592fe7",
"io.atomix" % "atomix-primary-backup" sha1 "1c895965e3e67a152ffbccb4283b6cee91b4ea61",
"io.netty" % "netty-tcnative-boringssl-static" sha1 "ff5f2d6db5aaa1b4df1b381382cd6581844aad9d",
"com.github.scopt" % "scopt" sha1 "e078455e1a65597146f8608dab3247bf1eb92e6e"
"com.github.scopt" % "scopt" sha1 "e078455e1a65597146f8608dab3247bf1eb92e6e",
"com.datadoghq" % "java-dogstatsd-client" sha1 "a9380127a42855a76af7787840a3a04b9fc4ce20"
)

verifyOptions in verify := VerifyOptions(
Expand Down