@@ -52,16 +52,52 @@ def validate_changeset(changeset: Changeset, check_paths: bool = True) -> Valida
5252 if version not in (None , 1 ):
5353 report .errors .append (f"Unsupported schema_lens_version: { version } " )
5454
55- required = [
56- "baseline.solr_url" ,
57- "baseline.collection" ,
58- "data.docs_source.path" ,
59- "queries.source.path" ,
60- ]
55+ required = ["baseline.solr_url" , "baseline.collection" ]
6156 for key in required :
6257 if _get_in (raw , key ) in (None , "" ):
6358 report .errors .append (f"Missing required field: { key } " )
6459
60+ docs_source = _get_in (raw , "data.docs_source" ) or {}
61+ if not isinstance (docs_source , dict ):
62+ report .errors .append ("data.docs_source must be an object" )
63+ docs_source = {}
64+ docs_source_type = str (docs_source .get ("type" , "file" ))
65+ if docs_source_type not in {"file" , "solr" }:
66+ report .errors .append ("data.docs_source.type must be 'file' or 'solr'" )
67+ if docs_source_type == "file" :
68+ if not docs_source .get ("path" ):
69+ report .errors .append ("Missing required field: data.docs_source.path" )
70+ else :
71+ for key in ("solr_url" , "collection" ):
72+ if not docs_source .get (key ):
73+ report .errors .append (f"Missing required field: data.docs_source.{ key } " )
74+ mode = docs_source .get ("mode" )
75+ if mode and mode not in {"export" , "cursormark" }:
76+ report .errors .append ("data.docs_source.mode must be 'export' or 'cursormark'" )
77+
78+ query_source = _get_in (raw , "queries.source" ) or {}
79+ if not isinstance (query_source , dict ):
80+ report .errors .append ("queries.source must be an object" )
81+ query_source = {}
82+ query_source_type = str (query_source .get ("type" , "file" ))
83+ if query_source_type not in {"file" , "log" }:
84+ report .errors .append ("queries.source.type must be 'file' or 'log'" )
85+ if not query_source .get ("path" ):
86+ report .errors .append ("Missing required field: queries.source.path" )
87+
88+ if query_source_type == "log" :
89+ fmt = str (query_source .get ("format" , "solr_params" ))
90+ if fmt not in {"solr_params" , "jsonl" }:
91+ report .errors .append ("queries.source.format must be 'solr_params' or 'jsonl'" )
92+
93+ sampling_mode = _get_in (raw , "queries.sampling.mode" )
94+ if sampling_mode is not None and sampling_mode not in {"top" , "reservoir" }:
95+ report .errors .append ("queries.sampling.mode must be 'top' or 'reservoir'" )
96+
97+ preflight_fail = _get_in (raw , "preflight.fail_on_risk" )
98+ if preflight_fail is not None and not isinstance (preflight_fail , bool ):
99+ report .errors .append ("preflight.fail_on_risk must be boolean" )
100+
65101 changes = raw .get ("changes" , [])
66102 if not isinstance (changes , list ):
67103 report .errors .append ("changes must be a list" )
@@ -103,12 +139,11 @@ def validate_changeset(changeset: Changeset, check_paths: bool = True) -> Valida
103139 report .errors .append (f"{ loc } .set must be an object" )
104140
105141 if check_paths :
106- docs_path = _get_in (raw , "data.docs_source.path" )
142+ docs_path = _get_in (raw , "data.docs_source.path" ) if docs_source_type == "file" else None
107143 queries_path = _get_in (raw , "queries.source.path" )
108- path_entries = (
109- ("data.docs_source.path" , docs_path ),
110- ("queries.source.path" , queries_path ),
111- )
144+ path_entries = [("queries.source.path" , queries_path )]
145+ if docs_path is not None :
146+ path_entries .append (("data.docs_source.path" , docs_path ))
112147 for label , p in path_entries :
113148 if isinstance (p , str ):
114149 fp = _resolve_input_path (changeset .path , p )
0 commit comments