diff --git a/server/finders/dbfinder/census.go b/server/finders/dbfinder/census.go index 27b123f0..202cfd78 100644 --- a/server/finders/dbfinder/census.go +++ b/server/finders/dbfinder/census.go @@ -78,10 +78,14 @@ func (f *Finder) CensusGeographiesByEntityIDs(ctx context.Context, limit *int, w } } - // Process stops in 1 batch, others one-by-one (and set MatchEntityID for grouping later) var entityGeogs []*model.CensusGeography fields := getCensusGeographySelectFields(ctx) if entityType == "stop" { + // One batched query for every requested stop. The select keeps + // per-stop attribution (match_entity_id = stop.id) so arrangeGroup + // below can bucket results back to the right stop without needing a + // loop-then-tag pass. + fields.perStopAttribution = true var ents []*model.CensusGeography pw := forStopids(entityIds) if err := dbutil.Select(ctx, f.db, censusDatasetGeographySelect(limit, pw, fields), &ents); err != nil { @@ -89,6 +93,10 @@ func (f *Finder) CensusGeographiesByEntityIDs(ctx context.Context, limit *int, w } entityGeogs = append(entityGeogs, ents...) } else { + // Routes / agencies: the union over the entity's stop set is what we + // want (avoids double-counting tracts hit by multiple stops). One + // query per entity; tag MatchEntityID after scan since the SQL emits + // 0 for the unioned buffer. for _, entityId := range entityIds { stopIds, err := getBufferStopIds(ctx, f.db, entityType, entityId) if err != nil { @@ -326,6 +334,12 @@ type censusGeographySelectFields struct { intersectionGeometry bool geometryArea bool geometry bool + // Caller flag (not GraphQL-driven): when true, the buffer CTE for the + // `stop_buffer` filter emits one row per stop with match_entity_id = + // gtfs_stops.id instead of unioning all stops into a single polygon. + // Used by CensusGeographiesByEntityIDs for entityType == "stop" so a + // single batched query can be grouped back per requesting stop. + perStopAttribution bool } func getCensusGeographySelectFields(ctx context.Context) censusGeographySelectFields { @@ -425,8 +439,21 @@ func censusDatasetGeographySelect(limit *int, where *model.CensusDatasetGeograph Column("gtfs_stops.id as match_entity_id"). From("gtfs_stops"). Where(In("gtfs_stops.id", loc.StopBuffer.StopIds)) + } else if fields.perStopAttribution { + // One buffer per stop, attribution preserved as + // match_entity_id. Tract rows are duplicated when a tract + // intersects multiple stops in the input set; that's the + // intent — callers want per-stop apportionment. + qBufferUse = true + qBuffer = sq.StatementBuilder.Select(). + Column("gtfs_stops.id as match_entity_id"). + Column("ST_Buffer(gtfs_stops.geometry::geography, ?)::geometry as buffer", radius). + From("gtfs_stops"). + Where(In("gtfs_stops.id", loc.StopBuffer.StopIds)) } else { - // Add this as a pre-CTE + // Default: union over the input stop set, one polygon. Used + // by routes/agencies (which want the union over their stops) + // and by top-level aggregation queries. qBufferUse = true qBufferOuter := sq.StatementBuilder.Select(). Column("ST_Union(ST_Buffer(gtfs_stops.geometry::geography, ?)::geometry) as buffer", radius). @@ -457,6 +484,9 @@ func censusDatasetGeographySelect(limit *int, where *model.CensusDatasetGeograph if fields.intersectionGeometry { q = q.Column("ST_Intersection(tlcg.geometry, buffer.buffer) as intersection_geometry") } + if fields.perStopAttribution { + q = q.Column("buffer.match_entity_id") + } } if qPointsUse { q = q.WithCTE(sq.CTE{ diff --git a/server/gql/stop_resolver_test.go b/server/gql/stop_resolver_test.go index e39f5755..4730ee41 100644 --- a/server/gql/stop_resolver_test.go +++ b/server/gql/stop_resolver_test.go @@ -378,7 +378,7 @@ func stopResolverTestcases(t testing.TB, cfg model.Config) []testcase { // }, // TODO: parent, children; test data has no stations. // TODO: level, pathways_from_stop, pathways_to_stop: test data has no pathways... - // TODO: census_geographies + // census_geographies: see per-stop buffer cases below // stop_times { name: "stop_times", @@ -552,7 +552,41 @@ func stopResolverTestcases(t testing.TB, cfg model.Config) []testcase { selector: "stops.#.stop_id", selectExpect: []string{}, }, - // TODO: census_geographies + // census_geographies (per-stop buffer attribution) + { + name: "census_geographies tract by stop buffer", + query: `query{stops(where:{feed_onestop_id:"BA", stop_id:"FTVL"}) { stop_id census_geographies(where:{layer:"tract", radius:100.0}) { geoid } } }`, + selector: "stops.0.census_geographies.#.geoid", + selectExpect: []string{"1400000US06001406100"}, + }, + { + name: "census_geographies county by stop buffer", + query: `query{stops(where:{feed_onestop_id:"BA", stop_id:"MCAR"}) { stop_id census_geographies(where:{layer:"county", radius:1000.0}) { geoid } } }`, + selector: "stops.0.census_geographies.#.geoid", + selectExpect: []string{"0500000US06001"}, + }, + { + // Two stops resolved in one request batch into a single + // CensusGeographiesByEntityIDs call; each must get its own + // geographies (including the tracts shared by both buffers). + // Before per-stop attribution every row carried match_entity_id=0 + // and both stops resolved to empty lists. + name: "census_geographies per-stop attribution across batch", + query: `query{ + s12: stops(where:{feed_onestop_id:"BA", stop_id:"12TH"}) { stop_id census_geographies(where:{layer:"tract", radius:300.0}) { geoid } } + s19: stops(where:{feed_onestop_id:"BA", stop_id:"19TH"}) { stop_id census_geographies(where:{layer:"tract", radius:300.0}) { geoid } } + }`, + sel: []testcaseSelector{ + { + selector: "s12.0.census_geographies.#.geoid", + expect: []string{"1400000US06001402801", "1400000US06001402802", "1400000US06001402900", "1400000US06001403000", "1400000US06001403100"}, + }, + { + selector: "s19.0.census_geographies.#.geoid", + expect: []string{"1400000US06001402801", "1400000US06001402900"}, + }, + }, + }, // TODO: route_stop_buffer } return testcases