From 74c46deea882737428febc7b625cb904ee681c9b Mon Sep 17 00:00:00 2001 From: DiCanio Date: Fri, 11 Oct 2019 17:43:05 +0200 Subject: [PATCH 1/2] Add Transactor Metrics First bunch of transactor metrics. Currently doesn't treat all metrics handed over by the transactor. resolves #2 --- src/datomic_tx_metrics/core.clj | 305 +++++++++++++++++++++++++++++++- 1 file changed, 302 insertions(+), 3 deletions(-) diff --git a/src/datomic_tx_metrics/core.clj b/src/datomic_tx_metrics/core.clj index 02284b2..2319d7c 100644 --- a/src/datomic_tx_metrics/core.clj +++ b/src/datomic_tx_metrics/core.clj @@ -2,6 +2,7 @@ (:require [aleph.http :as http] [bidi.ring :as bidi] + [clojure.string :as string] [environ.core :refer [env]] [prometheus.alpha :as prom] [taoensso.timbre :as log]) @@ -13,6 +14,154 @@ ;; ---- Metrics ---------------------------------------------------------------- +(prom/defgauge alarms + "Number of alarms/problems that have occurred." + {:namespace "datomic"}) + +(prom/defgauge alarms-indexing-job-failed + "Number of alarms related to the indexing job." + {:namespace "datomic"}) + +(prom/defgauge alarms-backpressure + "Number of alarms related to the transactor using back pressure." + {:namespace "datomic"}) + +(prom/defgauge alarms-unhandled-exception + "Number of alarms related to unhandled exceptions." + {:namespace "datomic"}) + +(prom/defgauge alarms-other + "Number of alarms that are not related to any other specific alarm metric." + {:namespace "datomic"}) + +(prom/defgauge available-ram-megabytes + "Unused RAM on transactor in MB." + {:namespace "datomic"}) + +(prom/defgauge object-cache-size + "Number of segments in the Datomic object cache." + {:namespace "datomic"}) + +(prom/defgauge remote-peers + "Number of remote peers connected." + {:namespace "datomic"}) + +(prom/defgauge successful-metric-reports + "Number of successful metric reports over a 1 min period." + {:namespace "datomic"}) + +(prom/defcounter transacted-datoms-total + "Number of transacted datoms." + {:namespace "datomic"}) + +(prom/defcounter transactions-total + "Total number of transactions." + {:namespace "datomic"}) + +(prom/defgauge transactions-batch + "Number of transactions batched into a single write to the log." + {:namespace "datomic"}) + +(prom/defcounter transacted-bytes-total + "Total volume of transaction data to log, peers in bytes." + {:namespace "datomic"}) + +(prom/defcounter transactions-msec-total + "Total time of transactions in msec." + {:namespace "datomic"}) + +(prom/defcounter transactions-add-fulltext-msec-total + "Total time of transactions spent to add fulltext." + {:namespace "datomic"}) + +(prom/defcounter transactions-write-log-msec-total + "Total time of transactions spent writing to log per transaction batch." + {:namespace "datomic"}) + +(prom/defgauge datoms + "Number of unique datoms in the index." + {:namespace "datomic"}) + +(prom/defgauge index-datoms + "Number of datoms stored by the index, all sorts." + {:namespace "datomic"}) + +(prom/defgauge index-segments + "Number of segments in the index." + {:namespace "datomic"}) + +(prom/defgauge index-writes + "Number of segments written by indexing job, reported at end." + {:namespace "datomic"}) + +(prom/defgauge index-writes-msec + "Time per index segment write." + {:namespace "datomic"}) + +(prom/defgauge index-creation-msec + "Time to create index in msec, reported at end of indexing job." + {:namespace "datomic"}) + +(prom/defgauge index-fulltext-creation-msec + "Time to create fulltext portion of index in msec." + {:namespace "datomic"}) + +(prom/defgauge memory-index-consumed-megabytes + "RAM consumed by memory index in MB." + {:namespace "datomic"}) + +(prom/defgauge memory-index-fill-msec + "Estimate of the time to fill the memory index, given the current write load." + {:namespace "datomic"}) + +(prom/defcounter storage-write-operations-total + "Total number of storage write operations." + {:namespace "datomic"}) + +(prom/defcounter storage-write-bytes-total + "Total number of bytes written to storage." + {:namespace "datomic"}) + +(prom/defgauge storage-write-msec + "Time spent writing to storage." + {:namespace "datomic"}) + +(prom/defcounter storage-read-operations-total + "Total number of storage read operations." + {:namespace "datomic"}) + +(prom/defcounter storage-read-bytes-total + "Total number of bytes read from storage." + {:namespace "datomic"}) + +(prom/defgauge storage-read-msec + "Time spent reading from storage." + {:namespace "datomic"}) + +(prom/defgauge storage-backoff-msec + "Time spent in backoff/retry around calls to storage." + {:namespace "datomic"}) + +(prom/defcounter storage-backoff-retries-total + "Total number of retried storage operations." + {:namespace "datomic"}) + +(prom/defgauge object-cache-hits-ratio + "Datomic object cache hit ratio." + {:namespace "datomic"}) + +(prom/defgauge garbage-segments + "Number of garbage segments created." + {:namespace "datomic"}) + +(prom/defgauge heartbeats-msec + "Time spent writing to storage as part of the heartbeat (transactor writes location)." + {:namespace "datomic"}) + +(prom/defgauge heartbeats + "Number of heartbeats." + {:namespace "datomic"}) + (def ^:private metrics-registry (doto (CollectorRegistry. true) (.register (StandardExports.)) @@ -20,7 +169,44 @@ (.register (GarbageCollectorExports.)) (.register (ThreadExports.)) (.register (ClassLoadingExports.)) - (.register (VersionInfoExports.)))) + (.register (VersionInfoExports.)) + (.register alarms) + (.register alarms-indexing-job-failed) + (.register alarms-backpressure) + (.register alarms-unhandled-exception) + (.register alarms-other) + (.register available-ram-megabytes) + (.register object-cache-size) + (.register remote-peers) + (.register successful-metric-reports) + (.register transacted-datoms-total) + (.register transactions-total) + (.register transactions-batch) + (.register transacted-bytes-total) + (.register transactions-msec-total) + (.register transactions-add-fulltext-msec-total) + (.register transactions-write-log-msec-total) + (.register datoms) + (.register index-datoms) + (.register index-segments) + (.register index-writes) + (.register index-writes-msec) + (.register index-creation-msec) + (.register index-fulltext-creation-msec) + (.register memory-index-consumed-megabytes) + (.register storage-write-operations-total) + (.register storage-write-bytes-total) + (.register storage-write-msec) + (.register storage-read-operations-total) + (.register storage-read-bytes-total) + (.register storage-read-msec) + (.register storage-backoff-msec) + (.register storage-backoff-retries-total) + (.register object-cache-hits-ratio) + (.register garbage-segments) + (.register heartbeats-msec) + (.register heartbeats) + )) ;; ---- Callback --------------------------------------------------------------- @@ -28,9 +214,122 @@ (defn tx-metrics-callback-handler "Called by Datomic transactor transferring its metrics." [tx-metrics] - (doseq [[name value] tx-metrics] - (log/info "Metric: " name " with value: " value))) + (when-let [alarms (:Alarm tx-metrics)] + (prom/set! alarms (count (keys alarms)))) + + (when-let [{:keys [sum]} (:AlarmIndexingJobFailed tx-metrics)] + (prom/set! alarms-indexing-job-failed sum)) + + (when-let [{:keys [sum]} (:AlarmBackPressure tx-metrics)] + (prom/set! alarms-backpressure sum)) + + (when-let [{:keys [sum]} (:AlarmUnhandledException tx-metrics)] + (prom/set! alarms-unhandled-exception sum)) + + (when-let [{:keys [sum]} (:AlarmUnhandledException tx-metrics)] + (prom/set! alarms-unhandled-exception sum)) + + (->> (keys tx-metrics) + (filter + (fn [key] + (and (string/starts-with? (name key) "Alarm") + (not= key :Alarm) + (not= key :AlarmIndexingJobFailed) + (not= key :AlarmBackPressure) + (not= key :AlarmUnhandledException)))) + (reduce + (fn [count {:keys [sum]}] + (+ count sum)) + 0) + (prom/set! alarms-other)) + + (when-let [mb (:AvailableMB tx-metrics)] + (prom/set! available-ram-megabytes mb)) + + (when-let [size (:ObjectCacheCount tx-metrics)] + (prom/set! object-cache-size size)) + + (when-let [{:keys [sum]} (:RemotePeers tx-metrics)] + (prom/set! remote-peers sum)) + + (when-let [{:keys [sum]} (:MetricsReport tx-metrics)] + (prom/set! successful-metric-reports sum)) + + (when-let [{:keys [sum count]} (:TransactionDatoms tx-metrics)] + (prom/inc! transacted-datoms-total sum) + (prom/inc! transactions-total count)) + + (when-let [{:keys [count]} (:TransactionBatch tx-metrics)] + (prom/set! transactions-batch count)) + + (when-let [{:keys [sum]} (:TransactionBytes tx-metrics)] + (prom/inc! transacted-bytes-total sum)) + + (when-let [{:keys [sum]} (:TransactionMsec tx-metrics)] + (prom/inc! transactions-msec-total sum)) + + (when-let [{:keys [sum]} (:DbAddFulltextMsec tx-metrics)] + (prom/inc! transactions-add-fulltext-msec-total sum)) + + (when-let [{:keys [sum]} (:LogWriteMsec tx-metrics)] + (prom/inc! transactions-write-log-msec-total sum)) + + (when-let [{:keys [sum]} (:Datoms tx-metrics)] + (prom/clear! datoms) + (prom/inc! datoms sum)) + + (when-let [{:keys [sum]} (:IndexDatoms tx-metrics)] + (prom/set! index-datoms sum)) + + (when-let [{:keys [sum]} (:IndexSegments tx-metrics)] + (prom/set! index-segments sum)) + + (when-let [{:keys [sum]} (:IndexWrites tx-metrics)] + (prom/set! index-writes sum)) + + (when-let [{:keys [sum]} (:IndexWriteMsec tx-metrics)] + (prom/set! index-writes-msec sum)) + + (when-let [{:keys [sum]} (:CreateEntireIndexMsec tx-metrics)] + (prom/set! index-creation-msec sum)) + + (when-let [{:keys [sum]} (:CreateFulltextIndexMsec tx-metrics)] + (prom/set! index-fulltext-creation-msec sum)) + + (when-let [{:keys [sum]} (:MemoryIndexMB tx-metrics)] + (prom/set! memory-index-consumed-megabytes sum)) + + (when-let [{:keys [sum]} (:MemoryIndexFillMsec tx-metrics)] + (prom/set! memory-index-fill-msec sum)) + + (when-let [{:keys [sum count]} (:StoragePutBytes tx-metrics)] + (prom/inc! storage-write-operations-total count) + (prom/inc! storage-write-bytes-total sum)) + + (when-let [{:keys [sum]} (:StoragePutMsec tx-metrics)] + (prom/set! storage-write-msec sum)) + + (when-let [{:keys [sum count]} (:StorageGetBytes tx-metrics)] + (prom/inc! storage-read-operations-total count) + (prom/inc! storage-read-bytes-total sum)) + + (when-let [{:keys [sum]} (:StorageGetMsec tx-metrics)] + (prom/set! storage-read-msec sum)) + + (when-let [{:keys [sum count]} (:StorageBackoff tx-metrics)] + (prom/set! storage-backoff-msec sum) + (prom/inc! storage-backoff-retries-total count)) + + (when-let [{:keys [sum count]} (:ObjectCache tx-metrics)] + (prom/set! object-cache-hits-ratio (/ (double sum) count))) + + (when-let [{:keys [sum]} (:GarbageSegments tx-metrics)] + (prom/set! garbage-segments sum)) + (when-let [{:keys [sum count]} (:HeartbeatMsec tx-metrics)] + (prom/set! heartbeats-msec sum) + (prom/set! heartbeats count)) + ) ;; ---- Server ----------------------------------------------------------------- From 97eff59a48e6f2e5991f999e7fc6d80234fe1dd4 Mon Sep 17 00:00:00 2001 From: DiCanio Date: Tue, 15 Oct 2019 19:04:30 +0200 Subject: [PATCH 2/2] Add Documentation --- README.md | 346 +++++++++++++++++++++++++- examples/docker/Dockerfile | 36 +++ examples/docker/README.md | 56 +++++ examples/docker/logback.xml | 57 +++++ examples/docker/start.sh | 21 ++ examples/docker/transactor.properties | 127 ++++++++++ 6 files changed, 642 insertions(+), 1 deletion(-) create mode 100644 examples/docker/Dockerfile create mode 100644 examples/docker/README.md create mode 100644 examples/docker/logback.xml create mode 100644 examples/docker/start.sh create mode 100644 examples/docker/transactor.properties diff --git a/README.md b/README.md index da3ac30..5267fc5 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,346 @@ # datomic-tx-metrics -Collecting Datomic Transactor + JVM metrics for consumption using a web endpoint (e.g. by Prometheus). +Collecting Datomic Transactor + JVM metrics for consumption by [Prometheus](https://prometheus.io/) by offering a web endpoint. + +## How does it work? + +### Registering the metrics collector at the transactor. +Add the following line to your `transactor.properties` file: + +``` +metrics-callback=datomic-tx-metrics.core/tx-metrics-callback-handler +``` + +Next ensure that the JAR (you can download one in the release section) is present within Datomic's `/lib` directory to be loaded at runtime. An example leveraging a Docker container can be found in the examples section of this repository. + +### Configuring the metrics collector + +* specifiy the port of the web server fired up by the metrics collector using the environment variable `METRICS_PORT` when starting the transactor (defaults to _11509_) + +### Scraping Metrics + +* Metrics collector fires up a web server when loaded by the transactor +* Metrics are typically sent from the transactor to the callback function every `10s` (keep this in mind since values might not change over this duration). +* Scrape the collected metrics by requesting the started metrics web server under the `/metrics` endpoint + +The following is an exemplary Prometheus configuration file for scraping the metrics endpoint: + +```yaml +global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s +alerting: + alertmanagers: + - static_configs: + - targets: [] + scheme: http + timeout: 10s + api_version: v1 +scrape_configs: +- job_name: prometheus + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + static_configs: + - targets: + - localhost:9090 +- job_name: datomic-tx-metrics + scrape_interval: 10s + scrape_timeout: 5s + metrics_path: /metrics + scheme: http + static_configs: + - targets: + - localhost:11509 +``` +_**Note:** adjust the target according to your own deployment state._ + + +## What JVM metrics are covered? + +The following JVM metrics are covered as defined by [Prometheus Hotspot](https://github.com/prometheus/client_java/tree/parent-0.5.0/simpleclient_hotspot/src/main/java/io/prometheus/client/hotspot): + +* Standard Exports +* MemoryPoolsExports +* GarbageCollectorExports +* ThreadExports +* ClassLoadingExports +* VersionInfoExports + +## What Datomic transactor metrics are covered? + +The following CloudWatch metrics that can be created by the transactor are supported: + +- [x] Alarm +- [x] AlarmIndexingJobFailed +- [x] AlarmBackPressure +- [x] AlarmUnhandledException +- [x] Alarm{AnythingElse} +- [x] AvailableMB +- [ ] ClusterCreateFS +- [x] CreateEntireIndexMsec +- [x] CreateFulltextIndexMsec +- [x] Datoms +- [x] DBAddFulltextMsec +- [ ] FulltextSegments +- [x] GarbageSegments +- [x] HeartbeatMsec +- [x] HeartbeatMsec (samples) +- [ ] HeartMonitorMsec +- [x] IndexDatoms +- [x] IndexSegments +- [x] IndexWrites +- [x] IndexWriteMsec +- [ ] LogIngestBytes +- [ ] LogIngestMsec +- [x] LogWriteMsec +- [ ] Memcache +- [x] MemoryIndexMB +- [ ] MetricReport +- [ ] ObjectCache +- [ ] MemcachedPutMsec +- [ ] MemcachedPutFailedMsec +- [x] RemotePeers +- [x] StorageBackoff (total time per transactor metric report) +- [x] StorageBackoff (total number of retries) +- [x] Storage{Get,Put}Bytes (throughput per transactor metric report) +- [x] Storage{Get,Put}Bytes (operations count per transactor metric report) +- [x] Storage{Get,Put}Msec +- [x] TransactionBatch +- [x] TransactionBytes (total volume of transaction data to log, peers) +- [x] TransactionDatoms (total datoms transacted) +- [x] TransactionDatoms (total transactions) +- [x] TransactionMsec (total time spent on transactions) +- [ ] Valcache +- [ ] ValcachePutMsec +- [ ] ValcachePutFailedMsec + +The following additional metrics are calculated based on the metrics stated above: + +- [x] Object Cache Hit Ratio + + +## Example Metrics + +``` +# HELP datomic_remote_peers Number of remote peers connected. +# TYPE datomic_remote_peers gauge +datomic_remote_peers 1.0 +# HELP datomic_index_segments Number of segments in the index. +# TYPE datomic_index_segments gauge +datomic_index_segments 30732.0 +# HELP datomic_index_creation_msec Time to create index in msec, reported at end of indexing job. +# TYPE datomic_index_creation_msec gauge +datomic_index_creation_msec 29960.0 +# HELP datomic_storage_backoff_retries_total Total number of retried storage operations. +# TYPE datomic_storage_backoff_retries_total counter +datomic_storage_backoff_retries_total 0.0 +# HELP datomic_heartbeats Number of heartbeats. +# TYPE datomic_heartbeats gauge +datomic_heartbeats 12.0 +# HELP datomic_storage_read_msec Time spent reading from storage. +# TYPE datomic_storage_read_msec gauge +datomic_storage_read_msec 4.0 +# HELP datomic_transactions_total Total number of transactions. +# TYPE datomic_transactions_total counter +datomic_transactions_total 1789.0 +# HELP datomic_index_fulltext_creation_msec Time to create fulltext portion of index in msec. +# TYPE datomic_index_fulltext_creation_msec gauge +datomic_index_fulltext_creation_msec 0.0 +# HELP datomic_storage_write_bytes_total Total number of bytes written to storage. +# TYPE datomic_storage_write_bytes_total counter +datomic_storage_write_bytes_total 2.14121767E8 +# HELP datomic_transactions_add_fulltext_msec_total Total time of transactions spent to add fulltext. +# TYPE datomic_transactions_add_fulltext_msec_total counter +datomic_transactions_add_fulltext_msec_total 161.0 +# HELP datomic_object_cache_hits_ratio Datomic object cache hit ratio. +# TYPE datomic_object_cache_hits_ratio gauge +datomic_object_cache_hits_ratio 0.9966473960079482 +# HELP datomic_transactions_write_log_msec_total Total time of transactions spent writing to log per transaction batch. +# TYPE datomic_transactions_write_log_msec_total counter +datomic_transactions_write_log_msec_total 12408.0 +# HELP datomic_alarms_indexing_job_failed Number of alarms related to the indexing job. +# TYPE datomic_alarms_indexing_job_failed gauge +datomic_alarms_indexing_job_failed 0.0 +# HELP datomic_transacted_bytes_total Total volume of transaction data to log, peers in bytes. +# TYPE datomic_transacted_bytes_total counter +datomic_transacted_bytes_total 8.1213863E7 +# HELP datomic_alarms_backpressure Number of alarms related to the transactor using back pressure. +# TYPE datomic_alarms_backpressure gauge +datomic_alarms_backpressure 0.0 +# HELP datomic_transactions_batch Number of transactions batched into a single write to the log. +# TYPE datomic_transactions_batch gauge +datomic_transactions_batch 356.0 +# HELP datomic_heartbeats_msec Time spent writing to storage as part of the heartbeat (transactor writes location). +# TYPE datomic_heartbeats_msec gauge +datomic_heartbeats_msec 60004.0 +# HELP datomic_transacted_datoms_total Number of transacted datoms. +# TYPE datomic_transacted_datoms_total counter +datomic_transacted_datoms_total 4690819.0 +# HELP datomic_index_datoms Number of datoms stored by the index, all sorts. +# TYPE datomic_index_datoms gauge +datomic_index_datoms 1.58230545E8 +# HELP datomic_object_cache_size Number of segments in the Datomic object cache. +# TYPE datomic_object_cache_size gauge +datomic_object_cache_size 4431.0 +# HELP datomic_storage_write_operations_total Total number of storage write operations. +# TYPE datomic_storage_write_operations_total counter +datomic_storage_write_operations_total 13143.0 +# HELP datomic_transactions_msec_total Total time of transactions in msec. +# TYPE datomic_transactions_msec_total counter +datomic_transactions_msec_total 1113417.0 +# HELP jvm_info JVM version info +# TYPE jvm_info gauge +jvm_info{version="1.8.0_222-b10",vendor="Oracle Corporation",runtime="OpenJDK Runtime Environment",} 1.0 +# HELP jvm_threads_current Current thread count of a JVM +# TYPE jvm_threads_current gauge +jvm_threads_current 70.0 +# HELP jvm_threads_daemon Daemon thread count of a JVM +# TYPE jvm_threads_daemon gauge +jvm_threads_daemon 33.0 +# HELP jvm_threads_peak Peak thread count of a JVM +# TYPE jvm_threads_peak gauge +jvm_threads_peak 73.0 +# HELP jvm_threads_started_total Started thread count of a JVM +# TYPE jvm_threads_started_total counter +jvm_threads_started_total 93.0 +# HELP jvm_threads_deadlocked Cycles of JVM-threads that are in deadlock waiting to acquire object monitors or ownable synchronizers +# TYPE jvm_threads_deadlocked gauge +jvm_threads_deadlocked 0.0 +# HELP jvm_threads_deadlocked_monitor Cycles of JVM-threads that are in deadlock waiting to acquire object monitors +# TYPE jvm_threads_deadlocked_monitor gauge +jvm_threads_deadlocked_monitor 0.0 +# HELP datomic_index_writes Number of segments written by indexing job, reported at end. +# TYPE datomic_index_writes gauge +datomic_index_writes 2310.0 +# HELP datomic_available_ram_megabytes Unused RAM on transactor in MB. +# TYPE datomic_available_ram_megabytes gauge +datomic_available_ram_megabytes 1480.0 +# HELP datomic_storage_backoff_msec Time spent in backoff/retry around calls to storage. +# TYPE datomic_storage_backoff_msec gauge +datomic_storage_backoff_msec 0.0 +# HELP jvm_memory_bytes_used Used bytes of a given JVM memory area. +# TYPE jvm_memory_bytes_used gauge +jvm_memory_bytes_used{area="heap",} 2.806176568E9 +jvm_memory_bytes_used{area="nonheap",} 1.57299624E8 +# HELP jvm_memory_bytes_committed Committed (bytes) of a given JVM memory area. +# TYPE jvm_memory_bytes_committed gauge +jvm_memory_bytes_committed{area="heap",} 4.132962304E9 +jvm_memory_bytes_committed{area="nonheap",} 1.79191808E8 +# HELP jvm_memory_bytes_max Max (bytes) of a given JVM memory area. +# TYPE jvm_memory_bytes_max gauge +jvm_memory_bytes_max{area="heap",} 4.132962304E9 +jvm_memory_bytes_max{area="nonheap",} -1.0 +# HELP jvm_memory_bytes_init Initial bytes of a given JVM memory area. +# TYPE jvm_memory_bytes_init gauge +jvm_memory_bytes_init{area="heap",} 4.294967296E9 +jvm_memory_bytes_init{area="nonheap",} 2555904.0 +# HELP jvm_memory_pool_bytes_used Used bytes of a given JVM memory pool. +# TYPE jvm_memory_pool_bytes_used gauge +jvm_memory_pool_bytes_used{pool="Code Cache",} 3.6131072E7 +jvm_memory_pool_bytes_used{pool="Metaspace",} 1.00296496E8 +jvm_memory_pool_bytes_used{pool="Compressed Class Space",} 2.0872056E7 +jvm_memory_pool_bytes_used{pool="PS Eden Space",} 8.73379536E8 +jvm_memory_pool_bytes_used{pool="PS Survivor Space",} 1.47745544E8 +jvm_memory_pool_bytes_used{pool="PS Old Gen",} 1.785055616E9 +# HELP jvm_memory_pool_bytes_committed Committed bytes of a given JVM memory pool. +# TYPE jvm_memory_pool_bytes_committed gauge +jvm_memory_pool_bytes_committed{pool="Code Cache",} 3.6503552E7 +jvm_memory_pool_bytes_committed{pool="Metaspace",} 1.16301824E8 +jvm_memory_pool_bytes_committed{pool="Compressed Class Space",} 2.6386432E7 +jvm_memory_pool_bytes_committed{pool="PS Eden Space",} 1.106771968E9 +jvm_memory_pool_bytes_committed{pool="PS Survivor Space",} 1.6252928E8 +jvm_memory_pool_bytes_committed{pool="PS Old Gen",} 2.863661056E9 +# HELP jvm_memory_pool_bytes_max Max bytes of a given JVM memory pool. +# TYPE jvm_memory_pool_bytes_max gauge +jvm_memory_pool_bytes_max{pool="Code Cache",} 2.5165824E8 +jvm_memory_pool_bytes_max{pool="Metaspace",} -1.0 +jvm_memory_pool_bytes_max{pool="Compressed Class Space",} 1.073741824E9 +jvm_memory_pool_bytes_max{pool="PS Eden Space",} 1.106771968E9 +jvm_memory_pool_bytes_max{pool="PS Survivor Space",} 1.6252928E8 +jvm_memory_pool_bytes_max{pool="PS Old Gen",} 2.863661056E9 +# HELP jvm_memory_pool_bytes_init Initial bytes of a given JVM memory pool. +# TYPE jvm_memory_pool_bytes_init gauge +jvm_memory_pool_bytes_init{pool="Code Cache",} 2555904.0 +jvm_memory_pool_bytes_init{pool="Metaspace",} 0.0 +jvm_memory_pool_bytes_init{pool="Compressed Class Space",} 0.0 +jvm_memory_pool_bytes_init{pool="PS Eden Space",} 1.073741824E9 +jvm_memory_pool_bytes_init{pool="PS Survivor Space",} 1.78782208E8 +jvm_memory_pool_bytes_init{pool="PS Old Gen",} 2.863661056E9 +# HELP datomic_successful_metric_reports Number of successful metric reports over a 1 min period. +# TYPE datomic_successful_metric_reports gauge +datomic_successful_metric_reports 1.0 +# HELP jvm_classes_loaded The number of classes that are currently loaded in the JVM +# TYPE jvm_classes_loaded gauge +jvm_classes_loaded 19802.0 +# HELP jvm_classes_loaded_total The total number of classes that have been loaded since the JVM has started execution +# TYPE jvm_classes_loaded_total counter +jvm_classes_loaded_total 19802.0 +# HELP jvm_classes_unloaded_total The total number of classes that have been unloaded since the JVM has started execution +# TYPE jvm_classes_unloaded_total counter +jvm_classes_unloaded_total 0.0 +# HELP datomic_index_writes_msec Time per index segment write. +# TYPE datomic_index_writes_msec gauge +datomic_index_writes_msec 1246.0 +# HELP datomic_alarms_other Number of alarms that are not related to any other specific alarm metric. +# TYPE datomic_alarms_other gauge +datomic_alarms_other 0.0 +# HELP datomic_storage_write_msec Time spent writing to storage. +# TYPE datomic_storage_write_msec gauge +datomic_storage_write_msec 315.0 +# HELP datomic_storage_read_bytes_total Total number of bytes read from storage. +# TYPE datomic_storage_read_bytes_total counter +datomic_storage_read_bytes_total 7814735.0 +# HELP datomic_garbage_segments Number of garbage segments created. +# TYPE datomic_garbage_segments gauge +datomic_garbage_segments 2095.0 +# HELP jvm_gc_collection_seconds Time spent in a given JVM garbage collector in seconds. +# TYPE jvm_gc_collection_seconds summary +jvm_gc_collection_seconds_count{gc="PS Scavenge",} 196.0 +jvm_gc_collection_seconds_sum{gc="PS Scavenge",} 6.501 +jvm_gc_collection_seconds_count{gc="PS MarkSweep",} 4.0 +jvm_gc_collection_seconds_sum{gc="PS MarkSweep",} 0.34 +# HELP datomic_memory_index_consumed_megabytes RAM consumed by memory index in MB. +# TYPE datomic_memory_index_consumed_megabytes gauge +datomic_memory_index_consumed_megabytes 12.0 +# HELP datomic_datoms Number of unique datoms in the index. +# TYPE datomic_datoms gauge +datomic_datoms 5.8511225E7 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 608.24 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.571158208795E9 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 532.0 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1048576.0 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.0522513408E10 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 3.559723008E9 +# HELP datomic_alarms Number of alarms/problems that have occurred. +# TYPE datomic_alarms gauge +datomic_alarms 0.0 +# HELP datomic_alarms_unhandled_exception Number of alarms related to unhandled exceptions. +# TYPE datomic_alarms_unhandled_exception gauge +datomic_alarms_unhandled_exception 0.0 +# HELP datomic_storage_read_operations_total Total number of storage read operations. +# TYPE datomic_storage_read_operations_total counter +datomic_storage_read_operations_total 2027.0 + +``` + +## Troubleshooting + +__Problem:__ The transactor refuses to start because there is an error related to netty (some methods cannot be found). + +__Solution:__ Depending on the Datomic version in use the netty version that comes with it may be too old missing methods required by this project. Resolve this issue by replacing the netty-all*.jar in Datomic's `/lib` directory with a newer one. _This is also covered by the docker example in the __examples__ section of the repository._ + diff --git a/examples/docker/Dockerfile b/examples/docker/Dockerfile new file mode 100644 index 0000000..fc346e6 --- /dev/null +++ b/examples/docker/Dockerfile @@ -0,0 +1,36 @@ +FROM openjdk:8u222-jre + +ENV DATOMIC_VERSION 0.9.5966 + +ARG DATOMIC_ACC_USER +ARG DATOMIC_ACC_PASS + +RUN wget -q --http-user=${DATOMIC_ACC_USER} --http-password=${DATOMIC_ACC_PASS} https://my.datomic.com/repo/com/datomic/datomic-pro/$DATOMIC_VERSION/datomic-pro-$DATOMIC_VERSION.zip -O datomic-pro-$DATOMIC_VERSION.zip +RUN unzip -q /datomic-pro-${DATOMIC_VERSION}.zip +RUN rm /datomic-pro-${DATOMIC_VERSION}.zip +RUN mv /datomic-pro-${DATOMIC_VERSION} /datomic + +ENV DATOMIC_TX_METRICS_VERSION 0.1.0-alpha + +ADD https://github.com/life-research/datomic-tx-metrics/releases/download/v${DATOMIC_TX_METRICS_VERSION}/datomic-tx-metrics-${DATOMIC_TX_METRICS_VERSION}-standalone.jar /datomic/lib/ +ADD /transactor.properties /datomic/ +ADD /logback.xml /datomic/bin/logback.xml +ADD /start.sh /datomic/start +RUN chmod +x /datomic/start + +VOLUME /datomic/log + +# Replacing the netty version that comes with Datomic may be necessary if it's +# too old. Otherwise the metric collector's server won't start because of +# missing functions which eventually leads to the transactor not being able to +# finish startup. +RUN find /datomic/lib -name 'netty-all-.*' -delete +ADD https://repo1.maven.org/maven2/io/netty/netty-all/4.1.42.Final/netty-all-4.1.42.Final.jar /datomic/lib + +EXPOSE 4334 +EXPOSE 8080 + +ADD /datomic-tx-metrics.jar /datomic/lib/ + +WORKDIR /datomic +CMD ["./start"] diff --git a/examples/docker/README.md b/examples/docker/README.md new file mode 100644 index 0000000..c2bc054 --- /dev/null +++ b/examples/docker/README.md @@ -0,0 +1,56 @@ +# Docker Example + +A simple example of a containerized Datomic transactor with a registered metrics collector backed by a cassandra database. + +## Prerequisites + +* you have to have a Datomic account associated with a valid Pro license +* your license key has to be pasted into the `transactor.properties` file under the `license-key` configuration entry before build +* cassandra database is required + +## Build the container image + +Run the following command in order to build the container image: + +``` +docker build -t --build-arg DATOMIC_ACC_USER= --build-arg DATOMIC_ACC_PASS= datomic-tx-metrics: . +``` + +## Run the container + +The example assumes that the transactor can use an existing cassandra keyspace and table. **At this point the keyspace has to be `datomic`** _(can be changed at will by adjusting the `start.sh` and/or `transactor.properties` file)._ + +In order to successfully start the transactor the cassandra database to be used has to be configured beforehand. Tools needed to do so can be obtained from the official [Cassandra Download Page](http://cassandra.apache.org/download/) (they're part of the archive in the `bin` directory). + +If not already done you can do so by connecting to the database using `cqlsh`: +``` +cqlsh +``` + +Next create the `datomic` keyspace: +``` +CREATE KEYSPACE IF NOT EXISTS datomic WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor': }; +``` + +Subsequently create the `datomic` table within the previously created keyspace: +``` +CREATE TABLE IF NOT EXISTS datomic.datomic +( + id text PRIMARY KEY, + rev bigint, + map text, + val blob +); +``` + +Finally run the following command to start a container using the built container image: +``` +docker run --name "datomic-transactor" --rm -d \ + -e CASSANDRA_HOST= \ + -e CASSANDRA_TABLE= \ + -e METRICS_PORT= \ + -p : \ + datomic-tx-metrics: +``` + +**Note:** _If you are using the default port (11509) of the metrics collector's web server and not using the `METRICS_PORT` environment variable make sure to still expose that port since it's not exposed by default. Otherwise you won't be able to scrape it._ diff --git a/examples/docker/logback.xml b/examples/docker/logback.xml new file mode 100644 index 0000000..d078e12 --- /dev/null +++ b/examples/docker/logback.xml @@ -0,0 +1,57 @@ + + + + + true + + + + + %-5level [%thread] %logger{0}: %msg%n + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/docker/start.sh b/examples/docker/start.sh new file mode 100644 index 0000000..ed8946d --- /dev/null +++ b/examples/docker/start.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +sed -i "/host=0.0.0.0/a alt-host=${ALT_HOST:-127.0.0.1}" transactor.properties +sed -i "s/port=4334/port=${PORT:-4334}/" transactor.properties + +## Cassandra +sed -i "s/cassandra-host=/cassandra-host=${CASSANDRA_HOST}/" transactor.properties +sed -i "s/cassandra-table=datomic.datomic/cassandra-table=datomic.${CASSANDRA_TABLE:-datomic}/" transactor.properties + +## Memory Settings +sed -i "s/memory-index-threshold=32m/memory-index-threshold=${MEMORY_INDEX_THRESHOLD:-32m}/" transactor.properties +sed -i "s/memory-index-max=512m/memory-index-max=${MEMORY_INDEX_MAX:-512m}/" transactor.properties +sed -i "s/object-cache-max=1g/object-cache-max=${OBJECT_CACHE_MAX:-1g}/" transactor.properties +sed -i "s/write-concurrency=4/write-concurrency=${WRITE_CONCURRENCY:-4}/" transactor.properties +sed -i "s/read-concurrency=8/read-concurrency=${READ_CONCURRENCY:-8}/" transactor.properties + +## Logging +sed -i "s/root level=\"INFO\"/root level=\"${LOG_LEVEL_ROOT:-INFO}\"/" bin/logback.xml + +# Start transactor +exec bin/transactor "-Xms${XMX:-4g}" "-Xmx${XMX:-4g}" transactor.properties diff --git a/examples/docker/transactor.properties b/examples/docker/transactor.properties new file mode 100644 index 0000000..c6ac556 --- /dev/null +++ b/examples/docker/transactor.properties @@ -0,0 +1,127 @@ +################################################################### + +protocol=cass +host=0.0.0.0 +port=4334 + +################################################################### +# See https://docs.datomic.com/on-prem/storage.html + +license-key= + + + +################################################################### +# See https://docs.datomic.com/on-prem/storage.html + +cassandra-table=datomic.datomic +cassandra-host= +cassandra-port=9042 + + + +################################################################### +# See https://docs.datomic.com/on-prem/capacity.html + + +## Recommended settings for -Xmx4g production usage. +memory-index-threshold=32m +memory-index-max=512m +object-cache-max=1g + +## Recommended settings for -Xmx1g usage, e.g. dev laptops. +#memory-index-threshold=32m +#memory-index-max=256m +#object-cache-max=128m + + + +## OPTIONAL ####################################################### + + +## Set to false to disable SSL between the peers and the transactor. +# Default: true +# encrypt-channel=true + +## Data directory is used for dev: and free: storage, and +## as a temporary directory for all storages. +# data-dir=data + +## Transactor will log here, see bin/logback.xml to configure logging. +# log-dir=log + +## Transactor will write process pid here on startup +# pid-file=transactor.pid + + + +## OPTIONAL ####################################################### +# See https://docs.datomic.com/on-prem/storage.html +## Memcached configuration. + +# memcached=host:port,host:port,... +# memcached-username=datomic +# memcached-password=datomic + + + +## OPTIONAL ####################################################### +# See https://docs.datomic.com/on-prem/capacity.html + + +## Soft limit on the number of concurrent writes to storage. +# Default: 4, Miniumum: 2 +write-concurrency=4 + +## Soft limit on the number of concurrent reads to storage. +# Default: 2 times write-concurrency, Miniumum: 2 +read-concurrency=8 + + + +## OPTIONAL ####################################################### +# See https://docs.datomic.com/on-prem/aws.html +## Optional settings for rotating logs to S3 +# (Can be auto-generated by bin/datomic ensure-transactor.) + +# aws-s3-log-bucket-id= + + + +## OPTIONAL ####################################################### +# See https://docs.datomic.com/on-prem/aws.html +## Optional settings for Cloudwatch metrics. +# (Can be auto-generated by bin/datomic ensure-transactor.) + +# aws-cloudwatch-region= + +## Pick a unique name to distinguish transactor metrics from different systems. +# aws-cloudwatch-dimension-value=your-system-name + +metrics-callback=datomic-tx-metrics.core/tx-metrics-callback-handler + + + +## OPTIONAL ####################################################### +# See https://docs.datomic.com/on-prem/ha.html + + +## The transactor will write a heartbeat into storage on this interval. +## A standby transactor will take over if it sees the heartbeat go +## unwritten for 2x this interval. If your transactor load leads to +## long gc pauses, you can increase this number to prevent the standby +## transactor from unnecessarily taking over during a long gc pause. +# Default: 5000, Miniumum: 5000 +# heartbeat-interval-msec=5000 + + + +## OPTIONAL ####################################################### + + +## The transactor will use this partition for new entities that +## do not explicitly specify a partition. +# Default: :db.part/user +# default-partition=:db.part/user + +