Skip to content

Commit 272ba7d

Browse files
authored
Fix: TTL query add metadata TTL and PersistentWorker used wrong TTL for metrics cache if the storage is BanyanDB. (#13827)
1 parent 097fe8b commit 272ba7d

20 files changed

Lines changed: 199 additions & 19 deletions

File tree

.github/workflows/skywalking.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,9 @@ jobs:
390390
- name: Storage OpenSearch 3.0.0
391391
config: test/e2e-v2/cases/storage/opensearch/e2e.yaml
392392
env: OPENSEARCH_VERSION=3.0.0
393-
- name: Storage ES Sharding
393+
- name: Storage ES Sharding 8.18.8
394394
config: test/e2e-v2/cases/storage/es/es-sharding/e2e.yaml
395+
env: ES_VERSION=8.18.8
395396

396397
- name: Alarm ES
397398
config: test/e2e-v2/cases/alarm/es/e2e.yaml

docs/en/changes/changes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
* LAL: add `layer: auto` mode for dynamic layer assignment when `service.layer` is absent.
3030
* Add two-phase `SpanListener` SPI mechanism for extensible trace span processing. Refactor GenAI from hardcoded `SpanForward.processGenAILogic()` to `GenAISpanListener`.
3131
* Add OTLP/HTTP receiver support for traces, logs, and metrics (`/v1/traces`, `/v1/logs`, `/v1/metrics`). Supports both `application/x-protobuf` and `application/json` content types.
32+
* Fix: TTL query add metadata TTL.
33+
* Fix: PersistentWorker used wrong TTL for metrics cache if the storage is BanyanDB.
3234

3335
#### UI
3436

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/CoreModuleProvider.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,6 @@ TTLStatusQuery.class, new TTLStatusQuery(
394394
final MetricsStreamProcessor metricsStreamProcessor = MetricsStreamProcessor.getInstance();
395395
metricsStreamProcessor.setL1FlushPeriod(moduleConfig.getL1FlushPeriod());
396396
metricsStreamProcessor.setStorageSessionTimeout(moduleConfig.getStorageSessionTimeout());
397-
metricsStreamProcessor.setMetricsDataTTL(moduleConfig.getMetricsDataTTL());
398397
RecordStreamProcessor.getInstance().setRecordDataTTL(moduleConfig.getRecordDataTTL());
399398
TopNStreamProcessor.getInstance().setTopNWorkerReportCycle(moduleConfig.getTopNReportPeriod());
400399
apdexThresholdConfig = new ApdexThresholdConfig(this);

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/worker/MetricsStreamProcessor.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.skywalking.oap.server.core.analysis.StreamProcessor;
3030
import org.apache.skywalking.oap.server.core.analysis.metrics.Metrics;
3131
import org.apache.skywalking.oap.server.core.config.DownSamplingConfigService;
32+
import org.apache.skywalking.oap.server.core.query.TTLStatusQuery;
3233
import org.apache.skywalking.oap.server.core.storage.IMetricsDAO;
3334
import org.apache.skywalking.oap.server.core.storage.StorageBuilderFactory;
3435
import org.apache.skywalking.oap.server.core.storage.StorageDAO;
@@ -82,11 +83,6 @@ public class MetricsStreamProcessor implements StreamProcessor<Metrics> {
8283
*/
8384
@Setter
8485
private long storageSessionTimeout = 70_000;
85-
/**
86-
* @since 8.7.0 TTL settings from {@link org.apache.skywalking.oap.server.core.CoreModuleConfig#getMetricsDataTTL()}
87-
*/
88-
@Setter
89-
private int metricsDataTTL = 3;
9086

9187
public static MetricsStreamProcessor getInstance() {
9288
return PROCESSOR;
@@ -145,6 +141,9 @@ private void create(ModuleDefineHolder moduleDefineHolder,
145141
DownSamplingConfigService configService = moduleDefineHolder.find(CoreModule.NAME)
146142
.provider()
147143
.getService(DownSamplingConfigService.class);
144+
TTLStatusQuery ttlStatusQuery = moduleDefineHolder.find(CoreModule.NAME)
145+
.provider()
146+
.getService(TTLStatusQuery.class);
148147

149148
MetricsPersistentWorker hourPersistentWorker = null;
150149
MetricsPersistentWorker dayPersistentWorker = null;
@@ -168,13 +167,15 @@ private void create(ModuleDefineHolder moduleDefineHolder,
168167
Model model = modelSetter.add(
169168
metricsClass, stream.getScopeId(), new Storage(stream.getName(), timeRelativeID, DownSampling.Hour)
170169
);
171-
hourPersistentWorker = downSamplingWorker(moduleDefineHolder, metricsDAO, model, supportUpdate, kind);
170+
int hourTTL = ttlStatusQuery.getMetricsTTL(model);
171+
hourPersistentWorker = downSamplingWorker(moduleDefineHolder, metricsDAO, model, supportUpdate, kind, hourTTL);
172172
}
173173
if (configService.shouldToDay()) {
174174
Model model = modelSetter.add(
175175
metricsClass, stream.getScopeId(), new Storage(stream.getName(), timeRelativeID, DownSampling.Day)
176176
);
177-
dayPersistentWorker = downSamplingWorker(moduleDefineHolder, metricsDAO, model, supportUpdate, kind);
177+
int dayTTL = ttlStatusQuery.getMetricsTTL(model);
178+
dayPersistentWorker = downSamplingWorker(moduleDefineHolder, metricsDAO, model, supportUpdate, kind, dayTTL);
178179
}
179180

180181
transWorker = new MetricsTransWorker(
@@ -184,8 +185,9 @@ private void create(ModuleDefineHolder moduleDefineHolder,
184185
Model model = modelSetter.add(
185186
metricsClass, stream.getScopeId(), new Storage(stream.getName(), timeRelativeID, DownSampling.Minute)
186187
);
188+
int minuteTTL = ttlStatusQuery.getMetricsTTL(model);
187189
MetricsPersistentWorker minutePersistentWorker = minutePersistentWorker(
188-
moduleDefineHolder, metricsDAO, model, transWorker, supportUpdate, kind, metricsClass);
190+
moduleDefineHolder, metricsDAO, model, transWorker, supportUpdate, kind, metricsClass, minuteTTL);
189191

190192
String remoteReceiverWorkerName = stream.getName() + "_rec";
191193
IWorkerInstanceSetter workerInstanceSetter = moduleDefineHolder.find(CoreModule.NAME)
@@ -205,7 +207,8 @@ private MetricsPersistentWorker minutePersistentWorker(ModuleDefineHolder module
205207
MetricsTransWorker transWorker,
206208
boolean supportUpdate,
207209
MetricStreamKind kind,
208-
Class<? extends Metrics> metricsClass) {
210+
Class<? extends Metrics> metricsClass,
211+
int metricsDataTTL) {
209212
AlarmNotifyWorker alarmNotifyWorker = new AlarmNotifyWorker(moduleDefineHolder);
210213
ExportMetricsWorker exportWorker = new ExportMetricsWorker(moduleDefineHolder);
211214

@@ -222,7 +225,8 @@ private MetricsPersistentWorker downSamplingWorker(ModuleDefineHolder moduleDefi
222225
IMetricsDAO metricsDAO,
223226
Model model,
224227
boolean supportUpdate,
225-
MetricStreamKind kind) {
228+
MetricStreamKind kind,
229+
int metricsDataTTL) {
226230
MetricsPersistentWorker persistentWorker = new MetricsPersistentWorker(
227231
moduleDefineHolder, model, metricsDAO,
228232
supportUpdate, storageSessionTimeout, metricsDataTTL, kind

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/query/TTLStatusQuery.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import lombok.RequiredArgsConstructor;
2222
import org.apache.skywalking.oap.server.core.storage.StorageModule;
23+
import org.apache.skywalking.oap.server.core.storage.model.Model;
2324
import org.apache.skywalking.oap.server.core.storage.ttl.MetricsTTL;
2425
import org.apache.skywalking.oap.server.core.storage.ttl.RecordsTTL;
2526
import org.apache.skywalking.oap.server.core.storage.ttl.StorageTTLStatusQuery;
@@ -51,10 +52,27 @@ public TTLDefinition getTTL() {
5152
TTLDefinition ttlDefinition = getStorageTTLStatusQuery().getTTL();
5253
if (ttlDefinition == null) {
5354
ttlDefinition = new TTLDefinition(
54-
new MetricsTTL(coreMetricsDataTTL, coreMetricsDataTTL, coreMetricsDataTTL),
55+
new MetricsTTL(coreMetricsDataTTL, coreMetricsDataTTL, coreMetricsDataTTL, coreMetricsDataTTL),
5556
new RecordsTTL(coreRecordDataTTL, coreRecordDataTTL, coreRecordDataTTL, coreRecordDataTTL, coreRecordDataTTL)
5657
);
5758
}
5859
return ttlDefinition;
5960
}
61+
62+
/**
63+
* Get the effective TTL (in days) for a specific metrics model.
64+
* The returned value should include both hot and warm stage TTL,
65+
* representing the total period during which data is accessible.
66+
*
67+
* @param model the metrics model
68+
* @return TTL in days, or -1 if the storage does not customize per-model TTL
69+
* (consumer falls back to core metricsDataTTL)
70+
*/
71+
public int getMetricsTTL(Model model) {
72+
int ttl = getStorageTTLStatusQuery().getMetricsTTL(model);
73+
if (ttl < 0) {
74+
ttl = coreMetricsDataTTL;
75+
}
76+
return ttl;
77+
}
6078
}

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/ttl/MetricsTTL.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
*/
3131
@Data
3232
public class MetricsTTL {
33+
private final int metadata;
3334
private final int minute;
3435
private final int hour;
3536
private final int day;

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/ttl/StorageTTLStatusQuery.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.skywalking.oap.server.core.storage.ttl;
2020

21+
import org.apache.skywalking.oap.server.core.storage.model.Model;
2122
import org.apache.skywalking.oap.server.library.module.Service;
2223

2324
public interface StorageTTLStatusQuery extends Service {
@@ -30,4 +31,17 @@ public interface StorageTTLStatusQuery extends Service {
3031
default TTLDefinition getTTL() {
3132
return null;
3233
}
34+
35+
/**
36+
* Get the effective TTL (in days) for a specific metrics model.
37+
* The returned value should include both hot and warm stage TTL,
38+
* representing the total period during which data is accessible.
39+
*
40+
* @param model the metrics model
41+
* @return TTL in days, or -1 if the storage does not customize per-model TTL
42+
* (consumer falls back to core metricsDataTTL)
43+
*/
44+
default int getMetricsTTL(Model model) {
45+
return -1;
46+
}
3347
}

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/ttl/TTLDefinition.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public String toString() {
3939
ttlDefinition.append("#\n");
4040
ttlDefinition.append("# TTLs for each granularity metrics are listed separately.\n");
4141
ttlDefinition.append("#\n");
42+
ttlDefinition.append("metrics.metadata=").append(metrics.getMetadata()).append("\n");
4243
ttlDefinition.append("# Cover hot and warm data for BanyanDB.\n");
4344
ttlDefinition.append("metrics.minute=").append(metrics.getMinute()).append("\n");
4445
ttlDefinition.append("metrics.hour=").append(metrics.getHour()).append("\n");

oap-server/server-storage-plugin/storage-banyandb-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/banyandb/BanyanDBTTLStatusQuery.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
package org.apache.skywalking.oap.server.storage.plugin.banyandb;
2020

21+
import org.apache.skywalking.oap.server.core.UnexpectedException;
22+
import org.apache.skywalking.oap.server.core.storage.model.Model;
2123
import org.apache.skywalking.oap.server.core.storage.ttl.MetricsTTL;
2224
import org.apache.skywalking.oap.server.core.storage.ttl.RecordsTTL;
2325
import org.apache.skywalking.oap.server.core.storage.ttl.StorageTTLStatusQuery;
@@ -42,6 +44,7 @@ public class BanyanDBTTLStatusQuery implements StorageTTLStatusQuery {
4244
private int gmColdMinuteTTLDays = -1;
4345
private int gmColdHourTTLDays = -1;
4446
private int gmColdDayTTLDays = -1;
47+
private int gmMetadataTTLDays = -1;
4548

4649
public BanyanDBTTLStatusQuery(BanyanDBStorageConfig config) {
4750
grNormalTTLDays = config.getRecordsNormal().getTtl();
@@ -52,6 +55,8 @@ public BanyanDBTTLStatusQuery(BanyanDBStorageConfig config) {
5255
gmMinuteTTLDays = config.getMetricsMin().getTtl();
5356
gmHourTTLDays = config.getMetricsHour().getTtl();
5457
gmDayTTLDays = config.getMetricsDay().getTtl();
58+
gmMetadataTTLDays = config.getMetadata().getTtl();
59+
5560
config.getRecordsNormal().getAdditionalLifecycleStages().forEach(stage -> {
5661
if (stage.getName().equals(BanyanDBStorageConfig.StageName.warm)) {
5762
grNormalTTLDays = grNormalTTLDays + stage.getTtl();
@@ -113,7 +118,7 @@ public BanyanDBTTLStatusQuery(BanyanDBStorageConfig config) {
113118
@Override
114119
public TTLDefinition getTTL() {
115120
TTLDefinition definition = new TTLDefinition(
116-
new MetricsTTL(gmMinuteTTLDays, gmHourTTLDays, gmDayTTLDays),
121+
new MetricsTTL(gmMetadataTTLDays, gmMinuteTTLDays, gmHourTTLDays, gmDayTTLDays),
117122
new RecordsTTL(grNormalTTLDays, grTraceTTLDays, grZipkinTraceTTLDays, grLogTTLDays, grBrowserErrorLogTTLDays)
118123
);
119124
definition.getRecords().setColdNormal(grColdNormalTTLDays);
@@ -126,4 +131,21 @@ public TTLDefinition getTTL() {
126131
definition.getMetrics().setColdDay(gmColdDayTTLDays);
127132
return definition;
128133
}
134+
135+
@Override
136+
public int getMetricsTTL(Model model) {
137+
if (model.getBanyanDBModelExtension().isIndexMode()) {
138+
return gmMetadataTTLDays;
139+
}
140+
switch (model.getDownsampling()) {
141+
case Hour:
142+
return gmHourTTLDays;
143+
case Day:
144+
return gmDayTTLDays;
145+
case Minute:
146+
return gmMinuteTTLDays;
147+
default:
148+
throw new UnexpectedException("Unexpected downsampling " + model.getDownsampling());
149+
}
150+
}
129151
}

0 commit comments

Comments
 (0)