99from typing import Any
1010
1111from clp_py_utils .clp_config import QUERY_TASKS_TABLE_NAME
12+ from clp_py_utils .clp_metadata_db_utils import get_archives_table_name
1213from clp_py_utils .sql_adapter import SqlAdapter
14+ from opentelemetry .api .metrics import get_meter
1315
1416from job_orchestration .scheduler .scheduler_data import QueryTaskResult , QueryTaskStatus
1517
18+ # OpenTelemetry counters for query metrics.
19+ # Created at module-import time; when telemetry is disabled the meter is a
20+ # no-op so these counters silently accept ``add()`` calls.
21+ _query_meter = get_meter ("query-worker" )
22+ _bytes_scanned_counter = _query_meter .create_counter (
23+ "clp.query.bytes_scanned_total" ,
24+ description = "Total bytes of uncompressed log data scanned during queries" ,
25+ unit = "By" ,
26+ )
27+ _bytes_output_counter = _query_meter .create_counter (
28+ "clp.query.bytes_output_total" ,
29+ description = "Total bytes of query results output" ,
30+ unit = "By" ,
31+ )
32+
1633
1734def get_task_log_file_path (clp_logs_dir : Path , job_id : str , task_id : int ) -> Path :
1835 worker_logs_dir = clp_logs_dir / job_id
@@ -49,7 +66,11 @@ def run_query_task(
4966 job_id : str ,
5067 task_id : int ,
5168 start_time : datetime .datetime ,
52- ) -> tuple [QueryTaskResult , str ]:
69+ ) -> tuple [QueryTaskResult , str , int ]:
70+ """Run a query subprocess and return the result, stdout string, and stdout byte count.
71+
72+ :return: Tuple of (task_result, stdout_str, stdout_byte_count).
73+ """
5374 clo_log_path = get_task_log_file_path (clp_logs_dir , job_id , task_id )
5475 clo_log_file = open (clo_log_path , "w" )
5576
@@ -113,7 +134,7 @@ def sigterm_handler(_signo, _stack_frame):
113134 if QueryTaskStatus .FAILED == task_status :
114135 task_result .error_log_path = str (clo_log_path )
115136
116- return task_result , stdout_data .decode ("utf-8" )
137+ return task_result , stdout_data .decode ("utf-8" ), len ( stdout_data )
117138
118139
119140def update_query_task_metadata (
@@ -134,3 +155,32 @@ def update_query_task_metadata(
134155 WHERE id = { task_id }
135156 """
136157 db_cursor .execute (query )
158+
159+
160+ def emit_bytes_scanned (
161+ sql_adapter : SqlAdapter ,
162+ clp_metadata_db_conn_params : dict ,
163+ archive_id : str ,
164+ dataset : str | None ,
165+ logger : Logger ,
166+ ) -> None :
167+ """Emit the ``clp.query.bytes_scanned_total`` counter by looking up the
168+ archive's ``uncompressed_size`` from the metadata database.
169+ """
170+ try :
171+ with (
172+ closing (sql_adapter .create_connection (True )) as db_conn ,
173+ closing (db_conn .cursor (dictionary = True )) as db_cursor ,
174+ ):
175+ table_name = get_archives_table_name (
176+ clp_metadata_db_conn_params ["table_prefix" ], dataset
177+ )
178+ db_cursor .execute (
179+ f"SELECT uncompressed_size FROM { table_name } WHERE id = %s" ,
180+ (archive_id ,),
181+ )
182+ row = db_cursor .fetchone ()
183+ if row is not None :
184+ _bytes_scanned_counter .add (row ["uncompressed_size" ])
185+ except Exception :
186+ logger .exception ("Failed to emit bytes_scanned_total telemetry" )
0 commit comments