From 160c6e8d16a88a51e2c196a1dcf9b8577b7a781f Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 15:18:08 +0200 Subject: [PATCH 01/22] Add Feast feature store example with Redis CR and feature evolution workflow Reworked version of the Feast example: - Include redis-cr.yaml with namespace placeholder (no docs lookup needed) - Use feast[redis] pip dependency for Redis online store support - Replace MLflow section with a practical feature evolution workflow: define new computed features, register them, and query across views --- feast/README.md | 121 +++++++++ feast/feast-cr.yaml | 50 ++++ feast/feast_example.ipynb | 512 ++++++++++++++++++++++++++++++++++++++ feast/feature_store.yaml | 19 ++ feast/features.py | 50 ++++ feast/redis-cr.yaml | 37 +++ 6 files changed, 789 insertions(+) create mode 100644 feast/README.md create mode 100644 feast/feast-cr.yaml create mode 100644 feast/feast_example.ipynb create mode 100644 feast/feature_store.yaml create mode 100644 feast/features.py create mode 100644 feast/redis-cr.yaml diff --git a/feast/README.md b/feast/README.md new file mode 100644 index 0000000..85f2e8f --- /dev/null +++ b/feast/README.md @@ -0,0 +1,121 @@ +# Feast Feature Store Examples + +These examples demonstrate how to use [Feast](https://docs.feast.dev/) on prokube +for feature management in ML workflows. + +## Prerequisites + +- Feast must be enabled on your cluster (ask your admin) +- You have `kubectl` access to your Kubeflow profile namespace + +## Quick Start + +### 1. Deploy a Redis instance + +Create a password secret and a Redis CR in your namespace: + +```bash +# Generate a random password +kubectl create secret generic redis-feast \ + -n \ + --from-literal=password=$(openssl rand -base64 24 | tr -d '/') + +# Deploy the Redis CR (edit namespace in redis-cr.yaml first) +kubectl apply -f redis-cr.yaml +kubectl get redis -n -w +``` + +### 2. Create the Feast Redis secret + +```bash +NAMESPACE= +PASSWORD=$(kubectl get secret redis-feast -n $NAMESPACE \ + -o jsonpath='{.data.password}' | base64 -d) + +cat > /tmp/redis-config.yaml << EOF +connection_string: "redis-feast.${NAMESPACE}.svc.cluster.local:6379,password=${PASSWORD}" +EOF + +kubectl create secret generic feast-redis-config \ + -n $NAMESPACE \ + --from-file=redis=/tmp/redis-config.yaml + +rm /tmp/redis-config.yaml +``` + +### 3. Deploy a FeatureStore + +Edit `feast-cr.yaml` to set your namespace, then: + +```bash +kubectl apply -f feast-cr.yaml +kubectl get featurestore -n -w # wait until Ready +``` + +### 4. Run the notebook + +Open `feast_example.ipynb` in your Kubeflow notebook. The notebook reads the +`feast-redis-config` secret automatically and builds `feature_store.yaml` for you. + +## Files + +| File | What it is | +|------|------------| +| `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | +| `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR | +| `feature_store.yaml` | Feast SDK config — tells the Python client where registry and stores are | +| `features.py` | Feature definitions — entities, data sources, feature views | +| `feast_example.ipynb` | End-to-end notebook: generate data, apply, define new features, materialize, serve | + +### Why two YAML files? + +`feast-cr.yaml` is a **Kubernetes resource** (`kind: FeatureStore`) that the operator +reads to provision PVCs and the Feast server pod. You apply it once with `kubectl`. + +`feature_store.yaml` is a **Feast SDK config file** (fixed filename — Feast convention) +that the Python client and CLI read to know how to connect to the registry and stores. +You use it in notebooks and scripts. + +## Architecture + +Feast has three stores. Here is what each one does and which backend prokube uses: + +| Store | Purpose | Prokube default | Alternatives | +|-------|---------|-----------------|--------------| +| **Registry** | Stores feature definitions (entities, feature views, sources). Written on `feast apply`, read at startup. | SQLite on PVC | SQL databases (PostgreSQL, etc.) for multi-replica or shared setups | +| **Online store** | Holds the *latest* feature value per entity. Read on every inference request — latency critical. | Redis (your `Redis` CR) | SQLite on PVC (dev/test only; not multi-replica safe) | +| **Offline store** | Historical feature records for point-in-time joins during training. Batch workload, not on serving path. | Parquet/file on PVC | Dask (same parquet files, distributed compute — use only if data exceeds pod memory); cloud warehouses (BigQuery, Snowflake, Redshift) | + +The offline store default is `type: file` (pandas). You can switch to `type: dask` in +`feast-cr.yaml` if your datasets are too large to fit in memory, but it adds complexity +and is rarely needed. + +``` + ┌─────────────────────────────────┐ + │ Your Namespace │ + │ │ + │ Redis CR (redis-feast) │ + │ - your private Redis instance │ + │ │ + feast apply ──────▶ SQLite /tmp/registry.db │ + (notebook) │ - feature definitions │ + │ - entity schemas │ + │ │ + materialize ──────▶ Redis online store │ + │ - latest feature values │ + │ - sub-ms latency │ + │ - persistent across sessions │ + │ │ + historical ──────▶ Parquet on PVC (offline store) │ + features │ - time-series feature data │ + │ │ + │ Feast Server pod │ + │ - HTTP API for online features │ + │ - registry on PVC (/data/...) │ + └─────────────────────────────────┘ +``` + +- **Redis** (per-namespace): your private online store. You own and manage it. +- **Registry** (SQLite): feature definitions. In notebook workflows, uses `/tmp/registry.db`. + The Feast server pod uses the registry PVC at `/data/registry/registry.db`. +- **Offline store** (parquet/PVC): historical feature data for training. diff --git a/feast/feast-cr.yaml b/feast/feast-cr.yaml new file mode 100644 index 0000000..7aa27fb --- /dev/null +++ b/feast/feast-cr.yaml @@ -0,0 +1,50 @@ +# Example FeatureStore CR for prokube. +# Edit the namespace to match your Kubeflow profile. +# +# The operator will create: +# - A Feast deployment + service (online feature server) +# - PVCs for the SQLite registry and offline data store +# - A ConfigMap (feast--client) with client connection info +# +# Prerequisites: +# - feast-redis-config secret must exist in your namespace (see README) +apiVersion: feast.dev/v1 +kind: FeatureStore +metadata: + name: my-store + namespace: # <-- change this +spec: + feastProject: my_features + services: + runFeastApplyOnInit: false + securityContext: + runAsUser: 0 + registry: + local: + persistence: + file: + pvc: + mountPath: /data/registry + create: + storageClassName: mayastor-no-redundancy # adjust for your cluster + resources: + requests: + storage: 1Gi + offlineStore: + persistence: + file: + type: file + pvc: + mountPath: /data/offline + create: + storageClassName: mayastor-no-redundancy # adjust for your cluster + resources: + requests: + storage: 10Gi + onlineStore: + persistence: + store: + type: redis + secretRef: + name: feast-redis-config + secretKeyName: redis diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb new file mode 100644 index 0000000..f23c2bb --- /dev/null +++ b/feast/feast_example.ipynb @@ -0,0 +1,512 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1eaa631f", + "metadata": {}, + "source": [ + "# Feast Feature Store — End-to-End Example\n", + "\n", + "This notebook walks through the full Feast workflow on prokube:\n", + "\n", + "1. Generate sample feature data\n", + "2. Configure the Feast client\n", + "3. Register features in the registry (`feast apply`)\n", + "4. Retrieve historical features for training\n", + "5. Define and register a **new** feature\n", + "6. Materialize features to the Redis online store\n", + "7. Serve features online for inference\n", + "\n", + "## Prerequisites\n", + "\n", + "- A `FeatureStore` CR is deployed in your namespace (see `feast-cr.yaml`)\n", + "- A `Redis` CR is deployed in your namespace (see `redis-cr.yaml`)\n", + "- The `feast-redis-config` secret exists in your namespace (see README)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b8f4c32", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q 'feast[redis]' scikit-learn" + ] + }, + { + "cell_type": "markdown", + "id": "3956d1d1", + "metadata": {}, + "source": [ + "## 1. Generate sample data\n", + "\n", + "We create a parquet file simulating hourly driver statistics over the past 7 days.\n", + "In a real scenario this would come from your data pipeline.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2b288da", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "np.random.seed(42)\n", + "n = 1000\n", + "now = datetime.datetime.now()\n", + "timestamps = [now - datetime.timedelta(hours=i) for i in range(n)]\n", + "\n", + "driver_df = pd.DataFrame({\n", + " \"driver_id\": np.random.choice([1001, 1002, 1003, 1004, 1005], n),\n", + " \"event_timestamp\": timestamps,\n", + " \"conv_rate\": np.random.uniform(0.1, 1.0, n).astype(np.float32),\n", + " \"acc_rate\": np.random.uniform(0.5, 1.0, n).astype(np.float32),\n", + " \"avg_daily_trips\": np.random.randint(1, 50, n).astype(np.int64),\n", + " \"created\": timestamps,\n", + "})\n", + "\n", + "os.makedirs(\"data\", exist_ok=True)\n", + "driver_df.to_parquet(\"data/driver_stats.parquet\")\n", + "print(f\"Created {n} rows for {driver_df['driver_id'].nunique()} drivers\")\n", + "driver_df.head()\n" + ] + }, + { + "cell_type": "markdown", + "id": "ef7e1942", + "metadata": {}, + "source": [ + "## 2. Configure the Feast client\n", + "\n", + "We build `feature_store.yaml` by reading the Redis connection string from the\n", + "`feast-redis-config` secret in our namespace.\n", + "\n", + "The registry uses a local SQLite file (`/tmp/registry.db`). This is ephemeral\n", + "within the notebook session — re-run `feast apply` at the start of each session\n", + "to repopulate it. The Redis online store is persistent across sessions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8804025", + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "import subprocess\n", + "import yaml\n", + "\n", + "\n", + "def get_namespace():\n", + " \"\"\"Read the current namespace from the pod's service account.\"\"\"\n", + " try:\n", + " with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\") as f:\n", + " return f.read().strip()\n", + " except FileNotFoundError:\n", + " return subprocess.check_output(\n", + " [\"kubectl\", \"config\", \"view\", \"--minify\", \"-o\", \"jsonpath={..namespace}\"]\n", + " ).decode().strip()\n", + "\n", + "\n", + "def get_redis_connection_string():\n", + " \"\"\"Read the Redis connection string from the feast-redis-config secret.\n", + "\n", + " The secret has key 'redis' whose value is a YAML map:\n", + " connection_string: \"host:port,password=...\"\n", + " \"\"\"\n", + " result = subprocess.run(\n", + " [\"kubectl\", \"get\", \"secret\", \"feast-redis-config\",\n", + " \"-o\", \"jsonpath={.data.redis}\"],\n", + " capture_output=True, text=True, check=True,\n", + " )\n", + " raw = base64.b64decode(result.stdout).decode()\n", + " return yaml.safe_load(raw)[\"connection_string\"]\n", + "\n", + "\n", + "NAMESPACE = get_namespace()\n", + "REDIS_CONNECTION_STRING = get_redis_connection_string()\n", + "FEAST_PROJECT = \"my_features\" # must match spec.feastProject in your FeatureStore CR\n", + "\n", + "feature_store_yaml = (\n", + " f\"project: {FEAST_PROJECT}\\n\"\n", + " \"provider: local\\n\"\n", + " \"offline_store:\\n\"\n", + " \" type: file\\n\"\n", + " \"online_store:\\n\"\n", + " \" type: redis\\n\"\n", + " f\" connection_string: \\\"{REDIS_CONNECTION_STRING}\\\"\\n\"\n", + " \"registry:\\n\"\n", + " \" registry_type: file\\n\"\n", + " \" path: /tmp/registry.db\\n\"\n", + " \"auth:\\n\"\n", + " \" type: no_auth\\n\"\n", + " \"entity_key_serialization_version: 3\\n\"\n", + ")\n", + "\n", + "with open(\"feature_store.yaml\", \"w\") as f:\n", + " f.write(feature_store_yaml)\n", + "\n", + "print(\"feature_store.yaml written\")\n", + "print(f\"Namespace: {NAMESPACE}\")\n", + "# Print host:port only — hide password\n", + "print(f\"Redis: {REDIS_CONNECTION_STRING.split(',')[0]}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "358c2624", + "metadata": {}, + "source": [ + "## 3. Register features\n", + "\n", + "`feast apply` reads `features.py` and writes the entity, data source, and\n", + "feature view definitions to the local SQLite registry (`/tmp/registry.db`).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52ecbcf6", + "metadata": {}, + "outputs": [], + "source": [ + "!feast apply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10eb1055", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify what was registered\n", + "!feast feature-views list" + ] + }, + { + "cell_type": "markdown", + "id": "c7f35090", + "metadata": {}, + "source": [ + "## 4. Retrieve historical features for training\n", + "\n", + "`get_historical_features` performs a **point-in-time join**: for each entity row,\n", + "it finds the most recent feature values as of that entity's timestamp. This\n", + "prevents data leakage — you only see features that were available when the\n", + "event occurred.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7796c5fc", + "metadata": {}, + "outputs": [], + "source": [ + "from feast import FeatureStore\n", + "\n", + "store = FeatureStore(repo_path=\".\")\n", + "\n", + "entity_df = pd.DataFrame({\n", + " \"driver_id\": [1001, 1002, 1003, 1004, 1005],\n", + " \"event_timestamp\": [now] * 5,\n", + "})\n", + "\n", + "training_df = store.get_historical_features(\n", + " entity_df=entity_df,\n", + " features=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " ],\n", + ").to_df()\n", + "\n", + "print(\"Training data (point-in-time correct):\")\n", + "training_df\n" + ] + }, + { + "cell_type": "markdown", + "id": "5e15eee6", + "metadata": {}, + "source": [ + "## 5. Define and register a new feature\n", + "\n", + "A key part of working with Feast is evolving your feature definitions over time.\n", + "Here we add a **new computed feature** (`trips_per_hour`) by:\n", + "\n", + "1. Creating a new data source with the computed column\n", + "2. Defining a new `FeatureView` with the new feature\n", + "3. Running `feast apply` again to register it\n", + "\n", + "This shows the typical workflow when a data scientist discovers a useful\n", + "feature during exploration and wants to make it available for training and serving.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61f19018", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Create a new data source with a computed feature.\n", + "# In practice this could be a new parquet file from your pipeline,\n", + "# or a transformation of existing data.\n", + "\n", + "driver_enhanced = driver_df.copy()\n", + "\n", + "# Compute a rolling average of trips over the last 24 hours per driver\n", + "driver_enhanced = driver_enhanced.sort_values([\"driver_id\", \"event_timestamp\"])\n", + "driver_enhanced[\"trips_per_hour\"] = (\n", + " driver_enhanced\n", + " .groupby(\"driver_id\")[\"avg_daily_trips\"]\n", + " .transform(lambda x: x.rolling(24, min_periods=1).mean())\n", + " .astype(np.float32)\n", + ")\n", + "\n", + "# Also add a binary flag: is this a high-activity driver?\n", + "driver_enhanced[\"is_high_activity\"] = (\n", + " driver_enhanced[\"avg_daily_trips\"] > 30\n", + ").astype(np.int64)\n", + "\n", + "driver_enhanced.to_parquet(\"data/driver_stats_enhanced.parquet\")\n", + "print(\"New columns: trips_per_hour, is_high_activity\")\n", + "driver_enhanced[[\"driver_id\", \"event_timestamp\", \"avg_daily_trips\",\n", + " \"trips_per_hour\", \"is_high_activity\"]].head(10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "new_feature_def", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 2: Define the new feature view in Python and write it to features.py.\n", + "# We append to the existing features.py so that `feast apply` picks it up.\n", + "\n", + "new_feature_code = '''\n", + "\n", + "# --- New feature view: enhanced driver stats ---\n", + "\n", + "driver_enhanced_source = FileSource(\n", + " path=\"data/driver_stats_enhanced.parquet\",\n", + " timestamp_field=\"event_timestamp\",\n", + " created_timestamp_column=\"created\",\n", + ")\n", + "\n", + "driver_enhanced_stats = FeatureView(\n", + " name=\"driver_enhanced_stats\",\n", + " entities=[driver],\n", + " ttl=timedelta(days=7),\n", + " schema=[\n", + " Field(name=\"trips_per_hour\", dtype=Float32),\n", + " Field(name=\"is_high_activity\", dtype=Int64),\n", + " ],\n", + " source=driver_enhanced_source,\n", + " online=True,\n", + ")\n", + "'''\n", + "\n", + "with open(\"features.py\", \"a\") as f:\n", + " f.write(new_feature_code)\n", + "\n", + "print(\"Appended driver_enhanced_stats feature view to features.py\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "apply_new", + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: Register the new feature view\n", + "!feast apply" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "verify_new", + "metadata": {}, + "outputs": [], + "source": [ + "# Verify: we should now see two feature views\n", + "!feast feature-views list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "use_new_features", + "metadata": {}, + "outputs": [], + "source": [ + "# Use the new feature in a historical query — combine features from both views\n", + "store = FeatureStore(repo_path=\".\") # reload to pick up new definitions\n", + "\n", + "training_df_v2 = store.get_historical_features(\n", + " entity_df=entity_df,\n", + " features=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " \"driver_enhanced_stats:trips_per_hour\",\n", + " \"driver_enhanced_stats:is_high_activity\",\n", + " ],\n", + ").to_df()\n", + "\n", + "print(\"Training data with original + new features:\")\n", + "training_df_v2\n" + ] + }, + { + "cell_type": "markdown", + "id": "51b15c4b", + "metadata": {}, + "source": [ + "## 6. Materialize features to Redis\n", + "\n", + "Materialization reads the latest feature values from the offline parquet store\n", + "and writes them to Redis for low-latency online serving.\n", + "\n", + "This materializes **all** registered feature views — including our new\n", + "`driver_enhanced_stats` view.\n", + "\n", + "In production you would run this on a schedule (e.g. hourly cron job).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6183ced7", + "metadata": {}, + "outputs": [], + "source": [ + "!feast materialize-incremental $(date -u +'%Y-%m-%dT%H:%M:%S')" + ] + }, + { + "cell_type": "markdown", + "id": "66ec423c", + "metadata": {}, + "source": [ + "## 7. Online feature serving\n", + "\n", + "Retrieve the latest feature values for specific entities. The Feast SDK reads\n", + "directly from Redis — no round-trip through the Feast server pod is needed.\n", + "\n", + "This is what you call at inference time: given a `driver_id`, get their\n", + "current features (including the new ones) to feed into the model.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6982cdba", + "metadata": {}, + "outputs": [], + "source": [ + "online_features = store.get_online_features(\n", + " features=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " \"driver_enhanced_stats:trips_per_hour\",\n", + " \"driver_enhanced_stats:is_high_activity\",\n", + " ],\n", + " entity_rows=[{\"driver_id\": 1001}, {\"driver_id\": 1002}],\n", + ").to_dict()\n", + "\n", + "print(\"Online features (latest values from Redis):\")\n", + "for k, v in online_features.items():\n", + " print(f\" {k}: {v}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72e36e86", + "metadata": {}, + "outputs": [], + "source": [ + "# Quick model training using features from both views\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "FEATURE_COLS = [\"acc_rate\", \"avg_daily_trips\", \"trips_per_hour\", \"is_high_activity\"]\n", + "TARGET = \"conv_rate\"\n", + "\n", + "X = training_df_v2[FEATURE_COLS].fillna(0)\n", + "y = training_df_v2[TARGET].fillna(0)\n", + "\n", + "model = LinearRegression().fit(X, y)\n", + "\n", + "# Use online features for inference\n", + "inference_df = pd.DataFrame(online_features)\n", + "predictions = model.predict(inference_df[FEATURE_COLS])\n", + "\n", + "for driver_id, pred in zip(inference_df[\"driver_id\"], predictions):\n", + " print(f\"Driver {driver_id}: predicted conv_rate = {pred:.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "72a52751", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "| Step | Command / API | Purpose |\n", + "|------|--------------|--------|\n", + "| Define features | `features.py` | Declare entities, sources, feature views |\n", + "| Register | `feast apply` | Write definitions to local SQLite registry |\n", + "| Training data | `store.get_historical_features()` | Point-in-time correct join from parquet |\n", + "| Add new features | Edit `features.py` + `feast apply` | Evolve feature definitions over time |\n", + "| Materialize | `feast materialize-incremental` | Push latest values to Redis online store |\n", + "| Online serving | `store.get_online_features()` | Sub-ms lookup from Redis by entity key |\n", + "\n", + "### Key takeaway: Adding new features\n", + "\n", + "The workflow for adding a new feature to Feast is:\n", + "1. Create/update the data source (parquet, database table, etc.)\n", + "2. Define a new `FeatureView` (or add fields to an existing one) in `features.py`\n", + "3. Run `feast apply` to register the new definitions\n", + "4. Run `feast materialize-incremental` to push values to the online store\n", + "\n", + "Once registered, the new features are immediately available via both\n", + "`get_historical_features()` (training) and `get_online_features()` (serving).\n", + "\n", + "### Note on the SQLite registry\n", + "\n", + "This notebook uses `/tmp/registry.db` as the registry, which is local to the\n", + "notebook session. Re-run the \"Configure\" and `feast apply` cells at the start\n", + "of each new session. The Redis online store is persistent across sessions —\n", + "features materialized in one session are still available in subsequent ones.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/feast/feature_store.yaml b/feast/feature_store.yaml new file mode 100644 index 0000000..0d6b181 --- /dev/null +++ b/feast/feature_store.yaml @@ -0,0 +1,19 @@ +# Template — fill in your Redis connection details. +# Get the Redis host and password from your admin. +# +# For workflows running inside the cluster, you can use /tmp/registry.db +# as the registry path (ephemeral, single-run). For persistent registry +# access, mount the registry PVC and use /data/registry/registry.db. +project: my_features +provider: local +offline_store: + type: file +online_store: + type: redis + connection_string: ":6379,password=" +registry: + registry_type: file + path: /tmp/registry.db +auth: + type: no_auth +entity_key_serialization_version: 3 diff --git a/feast/features.py b/feast/features.py new file mode 100644 index 0000000..8116f89 --- /dev/null +++ b/feast/features.py @@ -0,0 +1,50 @@ +""" +Feast feature definitions for the driver stats example. + +This file defines: +- An entity (driver_id) identifying what we're tracking features for +- A data source (parquet file with historical driver data) +- A feature view (driver_hourly_stats) with three features + +To register these with the Feast registry: + feast apply + +To materialize features to the online store: + feast materialize-incremental $(date -u +"%Y-%m-%dT%H:%M:%S") +""" + +from datetime import timedelta + +from feast import Entity, FeatureView, Field, FileSource +from feast.types import Float32, Int64 + +# Entity: the "primary key" for feature lookups. +# When you request features, you provide entity values (e.g. driver_id=1001). +driver = Entity( + name="driver_id", + description="Unique driver identifier", +) + +# Data source: where historical feature data lives. +# This parquet file is generated by the notebook example. +driver_stats_source = FileSource( + path="data/driver_stats.parquet", + timestamp_field="event_timestamp", + created_timestamp_column="created", +) + +# Feature view: a logical group of features from one data source. +# - `ttl` controls how stale a feature can be before it's considered expired +# - `online=True` means features are materialized to the online store +driver_hourly_stats = FeatureView( + name="driver_hourly_stats", + entities=[driver], + ttl=timedelta(days=7), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + ], + source=driver_stats_source, + online=True, +) diff --git a/feast/redis-cr.yaml b/feast/redis-cr.yaml new file mode 100644 index 0000000..df50c01 --- /dev/null +++ b/feast/redis-cr.yaml @@ -0,0 +1,37 @@ +# Redis instance for Feast online store. +# Replace with your Kubeflow profile namespace. +# +# Before applying, create the password secret: +# kubectl create secret generic redis-feast \ +# -n \ +# --from-literal=password=$(openssl rand -base64 24 | tr -d '/') +apiVersion: redis.redis.opstreelabs.in/v1beta2 +kind: Redis +metadata: + name: redis-feast + namespace: # <-- change this +spec: + kubernetesConfig: + image: quay.io/opstree/redis:v7.0.15 + imagePullPolicy: IfNotPresent + redisSecret: + name: redis-feast + key: password + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + storage: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi From 8ae765ca6f893ae199fadc17bb008032025c00a1 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 15:32:17 +0200 Subject: [PATCH 02/22] Simplify notebook: inline feature definitions, single sequential flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove features.py — all definitions live in the notebook - Replace CLI-based feast apply with store.apply() Python API - Add On Demand Feature View (ODFV) example for derived features - Explain why /tmp/registry.db is ephemeral and why that's OK - Single linear flow: setup → data → define → register → train → materialize → serve --- feast/README.md | 7 +- feast/feast_example.ipynb | 489 +++++++++++++++++++------------------- feast/features.py | 50 ---- 3 files changed, 242 insertions(+), 304 deletions(-) delete mode 100644 feast/features.py diff --git a/feast/README.md b/feast/README.md index 85f2e8f..5d0b0da 100644 --- a/feast/README.md +++ b/feast/README.md @@ -63,9 +63,8 @@ Open `feast_example.ipynb` in your Kubeflow notebook. The notebook reads the |------|------------| | `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | | `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR | -| `feature_store.yaml` | Feast SDK config — tells the Python client where registry and stores are | -| `features.py` | Feature definitions — entities, data sources, feature views | -| `feast_example.ipynb` | End-to-end notebook: generate data, apply, define new features, materialize, serve | +| `feature_store.yaml` | Feast SDK config template — the notebook generates this automatically | +| `feast_example.ipynb` | End-to-end notebook: define features, register, train, materialize, serve | ### Why two YAML files? @@ -97,7 +96,7 @@ and is rarely needed. │ Redis CR (redis-feast) │ │ - your private Redis instance │ │ │ - feast apply ──────▶ SQLite /tmp/registry.db │ +store.apply() ─────▶ SQLite /tmp/registry.db │ (notebook) │ - feature definitions │ │ - entity schemas │ │ │ diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index f23c2bb..dc7fdeb 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -5,77 +5,46 @@ "id": "1eaa631f", "metadata": {}, "source": [ - "# Feast Feature Store — End-to-End Example\n", + "# Feast Feature Store on prokube\n", "\n", - "This notebook walks through the full Feast workflow on prokube:\n", + "This notebook walks through the full Feast workflow:\n", "\n", - "1. Generate sample feature data\n", - "2. Configure the Feast client\n", - "3. Register features in the registry (`feast apply`)\n", - "4. Retrieve historical features for training\n", - "5. Define and register a **new** feature\n", - "6. Materialize features to the Redis online store\n", - "7. Serve features online for inference\n", + "1. **Setup** — install dependencies, configure the Feast client\n", + "2. **Generate data** — create sample driver statistics\n", + "3. **Define features** — entities, feature views, and on-demand transformations\n", + "4. **Register** — push definitions to the Feast registry\n", + "5. **Train** — retrieve historical features with point-in-time correctness\n", + "6. **Materialize** — push latest values to Redis for online serving\n", + "7. **Serve** — retrieve features at inference time\n", "\n", - "## Prerequisites\n", + "Everything happens inline in this notebook — no external Python files or CLI\n", + "commands needed.\n", "\n", - "- A `FeatureStore` CR is deployed in your namespace (see `feast-cr.yaml`)\n", - "- A `Redis` CR is deployed in your namespace (see `redis-cr.yaml`)\n", - "- The `feast-redis-config` secret exists in your namespace (see README)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b8f4c32", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -q 'feast[redis]' scikit-learn" + "### Prerequisites\n", + "\n", + "Before running this notebook, make sure you have deployed:\n", + "- A **Redis** instance in your namespace (`redis-cr.yaml`)\n", + "- A **FeatureStore** CR in your namespace (`feast-cr.yaml`)\n", + "- The **feast-redis-config** secret (see `README.md` for setup steps)\n" ] }, { "cell_type": "markdown", - "id": "3956d1d1", + "id": "setup_header", "metadata": {}, "source": [ - "## 1. Generate sample data\n", - "\n", - "We create a parquet file simulating hourly driver statistics over the past 7 days.\n", - "In a real scenario this would come from your data pipeline.\n" + "---\n", + "## 1. Setup\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "f2b288da", + "id": "4b8f4c32", "metadata": {}, "outputs": [], "source": [ - "import datetime\n", - "import os\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "np.random.seed(42)\n", - "n = 1000\n", - "now = datetime.datetime.now()\n", - "timestamps = [now - datetime.timedelta(hours=i) for i in range(n)]\n", - "\n", - "driver_df = pd.DataFrame({\n", - " \"driver_id\": np.random.choice([1001, 1002, 1003, 1004, 1005], n),\n", - " \"event_timestamp\": timestamps,\n", - " \"conv_rate\": np.random.uniform(0.1, 1.0, n).astype(np.float32),\n", - " \"acc_rate\": np.random.uniform(0.5, 1.0, n).astype(np.float32),\n", - " \"avg_daily_trips\": np.random.randint(1, 50, n).astype(np.int64),\n", - " \"created\": timestamps,\n", - "})\n", - "\n", - "os.makedirs(\"data\", exist_ok=True)\n", - "driver_df.to_parquet(\"data/driver_stats.parquet\")\n", - "print(f\"Created {n} rows for {driver_df['driver_id'].nunique()} drivers\")\n", - "driver_df.head()\n" + "!pip install -q 'feast[redis]' scikit-learn" ] }, { @@ -83,14 +52,18 @@ "id": "ef7e1942", "metadata": {}, "source": [ - "## 2. Configure the Feast client\n", - "\n", - "We build `feature_store.yaml` by reading the Redis connection string from the\n", - "`feast-redis-config` secret in our namespace.\n", - "\n", - "The registry uses a local SQLite file (`/tmp/registry.db`). This is ephemeral\n", - "within the notebook session — re-run `feast apply` at the start of each session\n", - "to repopulate it. The Redis online store is persistent across sessions.\n" + "### Configure the Feast client\n", + "\n", + "We build `feature_store.yaml` dynamically by reading the Redis connection\n", + "string from the `feast-redis-config` secret.\n", + "\n", + "**About the registry path (`/tmp/registry.db`):**\n", + "The registry is a small SQLite database that stores feature definitions\n", + "(entities, feature views, data sources). We write it to `/tmp` which means\n", + "it does **not** survive a pod restart. That's fine — the registry only holds\n", + "*definitions*, not data. Your actual feature *values* live in Redis (online\n", + "store, persistent) and parquet files (offline store, on PVC). Just re-run\n", + "the \"Define & Register\" cell after a restart to recreate it.\n" ] }, { @@ -117,11 +90,7 @@ "\n", "\n", "def get_redis_connection_string():\n", - " \"\"\"Read the Redis connection string from the feast-redis-config secret.\n", - "\n", - " The secret has key 'redis' whose value is a YAML map:\n", - " connection_string: \"host:port,password=...\"\n", - " \"\"\"\n", + " \"\"\"Read the Redis connection string from the feast-redis-config secret.\"\"\"\n", " result = subprocess.run(\n", " [\"kubectl\", \"get\", \"secret\", \"feast-redis-config\",\n", " \"-o\", \"jsonpath={.data.redis}\"],\n", @@ -133,7 +102,7 @@ "\n", "NAMESPACE = get_namespace()\n", "REDIS_CONNECTION_STRING = get_redis_connection_string()\n", - "FEAST_PROJECT = \"my_features\" # must match spec.feastProject in your FeatureStore CR\n", + "FEAST_PROJECT = \"my_features\"\n", "\n", "feature_store_yaml = (\n", " f\"project: {FEAST_PROJECT}\\n\"\n", @@ -156,219 +125,248 @@ "\n", "print(\"feature_store.yaml written\")\n", "print(f\"Namespace: {NAMESPACE}\")\n", - "# Print host:port only — hide password\n", "print(f\"Redis: {REDIS_CONNECTION_STRING.split(',')[0]}\")\n" ] }, { "cell_type": "markdown", - "id": "358c2624", + "id": "3956d1d1", "metadata": {}, "source": [ - "## 3. Register features\n", + "---\n", + "## 2. Generate sample data\n", "\n", - "`feast apply` reads `features.py` and writes the entity, data source, and\n", - "feature view definitions to the local SQLite registry (`/tmp/registry.db`).\n" + "We create a parquet file simulating hourly driver statistics over the past\n", + "7 days. In a real project this would come from your data pipeline.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "52ecbcf6", - "metadata": {}, - "outputs": [], - "source": [ - "!feast apply" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10eb1055", + "id": "f2b288da", "metadata": {}, "outputs": [], "source": [ - "# Verify what was registered\n", - "!feast feature-views list" - ] - }, - { - "cell_type": "markdown", - "id": "c7f35090", - "metadata": {}, - "source": [ - "## 4. Retrieve historical features for training\n", + "import datetime\n", + "import os\n", "\n", - "`get_historical_features` performs a **point-in-time join**: for each entity row,\n", - "it finds the most recent feature values as of that entity's timestamp. This\n", - "prevents data leakage — you only see features that were available when the\n", - "event occurred.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7796c5fc", - "metadata": {}, - "outputs": [], - "source": [ - "from feast import FeatureStore\n", + "import numpy as np\n", + "import pandas as pd\n", "\n", - "store = FeatureStore(repo_path=\".\")\n", + "np.random.seed(42)\n", + "n = 1000\n", + "now = datetime.datetime.now()\n", + "timestamps = [now - datetime.timedelta(hours=i) for i in range(n)]\n", "\n", - "entity_df = pd.DataFrame({\n", - " \"driver_id\": [1001, 1002, 1003, 1004, 1005],\n", - " \"event_timestamp\": [now] * 5,\n", + "driver_df = pd.DataFrame({\n", + " \"driver_id\": np.random.choice([1001, 1002, 1003, 1004, 1005], n),\n", + " \"event_timestamp\": timestamps,\n", + " \"conv_rate\": np.random.uniform(0.1, 1.0, n).astype(np.float32),\n", + " \"acc_rate\": np.random.uniform(0.5, 1.0, n).astype(np.float32),\n", + " \"avg_daily_trips\": np.random.randint(1, 50, n).astype(np.int64),\n", + " \"created\": timestamps,\n", "})\n", "\n", - "training_df = store.get_historical_features(\n", - " entity_df=entity_df,\n", - " features=[\n", - " \"driver_hourly_stats:conv_rate\",\n", - " \"driver_hourly_stats:acc_rate\",\n", - " \"driver_hourly_stats:avg_daily_trips\",\n", - " ],\n", - ").to_df()\n", - "\n", - "print(\"Training data (point-in-time correct):\")\n", - "training_df\n" + "os.makedirs(\"data\", exist_ok=True)\n", + "driver_df.to_parquet(\"data/driver_stats.parquet\")\n", + "print(f\"Created {n} rows for {driver_df['driver_id'].nunique()} drivers\")\n", + "driver_df.head()\n" ] }, { "cell_type": "markdown", - "id": "5e15eee6", + "id": "358c2624", "metadata": {}, "source": [ - "## 5. Define and register a new feature\n", + "---\n", + "## 3. Define features\n", "\n", - "A key part of working with Feast is evolving your feature definitions over time.\n", - "Here we add a **new computed feature** (`trips_per_hour`) by:\n", + "In Feast you define features as Python objects. There are two kinds:\n", "\n", - "1. Creating a new data source with the computed column\n", - "2. Defining a new `FeatureView` with the new feature\n", - "3. Running `feast apply` again to register it\n", - "\n", - "This shows the typical workflow when a data scientist discovers a useful\n", - "feature during exploration and wants to make it available for training and serving.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "61f19018", - "metadata": {}, - "outputs": [], - "source": [ - "# Step 1: Create a new data source with a computed feature.\n", - "# In practice this could be a new parquet file from your pipeline,\n", - "# or a transformation of existing data.\n", - "\n", - "driver_enhanced = driver_df.copy()\n", - "\n", - "# Compute a rolling average of trips over the last 24 hours per driver\n", - "driver_enhanced = driver_enhanced.sort_values([\"driver_id\", \"event_timestamp\"])\n", - "driver_enhanced[\"trips_per_hour\"] = (\n", - " driver_enhanced\n", - " .groupby(\"driver_id\")[\"avg_daily_trips\"]\n", - " .transform(lambda x: x.rolling(24, min_periods=1).mean())\n", - " .astype(np.float32)\n", - ")\n", + "- **FeatureView**: maps to columns in an existing data source (parquet file,\n", + " database table, etc.). Feast stores and serves them — but doesn't compute\n", + " anything. Your pipeline is responsible for producing the data.\n", "\n", - "# Also add a binary flag: is this a high-activity driver?\n", - "driver_enhanced[\"is_high_activity\"] = (\n", - " driver_enhanced[\"avg_daily_trips\"] > 30\n", - ").astype(np.int64)\n", + "- **On Demand Feature View (ODFV)**: a lightweight transformation that Feast\n", + " executes at query time. It can combine existing features, add request-time\n", + " inputs, or compute derived values. The transformation runs inline — during\n", + " `get_historical_features()` and `get_online_features()` — so it's always\n", + " consistent between training and serving.\n", "\n", - "driver_enhanced.to_parquet(\"data/driver_stats_enhanced.parquet\")\n", - "print(\"New columns: trips_per_hour, is_high_activity\")\n", - "driver_enhanced[[\"driver_id\", \"event_timestamp\", \"avg_daily_trips\",\n", - " \"trips_per_hour\", \"is_high_activity\"]].head(10)\n" + "Use FeatureViews for raw/precomputed data. Use ODFVs for derived features\n", + "that should be computed the same way everywhere.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "new_feature_def", + "id": "define_features", "metadata": {}, "outputs": [], "source": [ - "# Step 2: Define the new feature view in Python and write it to features.py.\n", - "# We append to the existing features.py so that `feast apply` picks it up.\n", - "\n", - "new_feature_code = '''\n", - "\n", - "# --- New feature view: enhanced driver stats ---\n", + "from datetime import timedelta\n", + "\n", + "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, RequestSource\n", + "from feast.on_demand_feature_view import on_demand_feature_view\n", + "from feast.types import Float32, Float64, Int64\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# Entity: the \"primary key\" for feature lookups.\n", + "# When you request features, you provide entity values (e.g. driver_id=1001).\n", + "# ---------------------------------------------------------------------------\n", + "driver = Entity(\n", + " name=\"driver_id\",\n", + " description=\"Unique driver identifier\",\n", + ")\n", "\n", - "driver_enhanced_source = FileSource(\n", - " path=\"data/driver_stats_enhanced.parquet\",\n", + "# ---------------------------------------------------------------------------\n", + "# Data source: points to where historical data lives.\n", + "# ---------------------------------------------------------------------------\n", + "driver_stats_source = FileSource(\n", + " path=\"data/driver_stats.parquet\",\n", " timestamp_field=\"event_timestamp\",\n", " created_timestamp_column=\"created\",\n", ")\n", "\n", - "driver_enhanced_stats = FeatureView(\n", - " name=\"driver_enhanced_stats\",\n", + "# ---------------------------------------------------------------------------\n", + "# FeatureView: declares which columns from the source are features.\n", + "# These are raw/precomputed values — Feast just stores and serves them.\n", + "# ---------------------------------------------------------------------------\n", + "driver_hourly_stats = FeatureView(\n", + " name=\"driver_hourly_stats\",\n", " entities=[driver],\n", " ttl=timedelta(days=7),\n", " schema=[\n", - " Field(name=\"trips_per_hour\", dtype=Float32),\n", - " Field(name=\"is_high_activity\", dtype=Int64),\n", + " Field(name=\"conv_rate\", dtype=Float32),\n", + " Field(name=\"acc_rate\", dtype=Float32),\n", + " Field(name=\"avg_daily_trips\", dtype=Int64),\n", " ],\n", - " source=driver_enhanced_source,\n", + " source=driver_stats_source,\n", " online=True,\n", ")\n", - "'''\n", "\n", - "with open(\"features.py\", \"a\") as f:\n", - " f.write(new_feature_code)\n", + "# ---------------------------------------------------------------------------\n", + "# On Demand Feature View: a derived feature computed by Feast at query time.\n", + "#\n", + "# \"efficiency\" = conv_rate / acc_rate\n", + "#\n", + "# This transformation runs automatically when you request this feature —\n", + "# both during training (get_historical_features) and serving\n", + "# (get_online_features). You define it once; Feast guarantees consistency.\n", + "# ---------------------------------------------------------------------------\n", + "@on_demand_feature_view(\n", + " sources=[driver_hourly_stats],\n", + " schema=[\n", + " Field(name=\"efficiency\", dtype=Float64),\n", + " ],\n", + " mode=\"pandas\",\n", + ")\n", + "def driver_efficiency(features_df: pd.DataFrame) -> pd.DataFrame:\n", + " df = pd.DataFrame()\n", + " df[\"efficiency\"] = features_df[\"conv_rate\"] / features_df[\"acc_rate\"]\n", + " return df\n", + "\n", "\n", - "print(\"Appended driver_enhanced_stats feature view to features.py\")\n" + "print(\"Feature definitions created (not yet registered).\")\n" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "apply_new", + "cell_type": "markdown", + "id": "register_header", "metadata": {}, - "outputs": [], "source": [ - "# Step 3: Register the new feature view\n", - "!feast apply" + "---\n", + "## 4. Register features\n", + "\n", + "`store.apply()` writes all definitions to the registry. After this, Feast\n", + "knows which features exist, where the data comes from, and how derived\n", + "features are computed.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "verify_new", + "id": "apply_features", "metadata": {}, "outputs": [], "source": [ - "# Verify: we should now see two feature views\n", - "!feast feature-views list" + "store = FeatureStore(repo_path=\".\")\n", + "\n", + "store.apply([\n", + " driver,\n", + " driver_stats_source,\n", + " driver_hourly_stats,\n", + " driver_efficiency,\n", + "])\n", + "\n", + "print(\"Registered:\")\n", + "for fv in store.list_feature_views():\n", + " print(f\" FeatureView: {fv.name}\")\n", + "for odfv in store.list_on_demand_feature_views():\n", + " print(f\" OnDemandFeatureView: {odfv.name}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c7f35090", + "metadata": {}, + "source": [ + "---\n", + "## 5. Retrieve historical features for training\n", + "\n", + "`get_historical_features()` performs a **point-in-time join**: for each entity\n", + "row, it finds the most recent feature values *as of that timestamp*. This\n", + "prevents data leakage — you only see features that were available when the\n", + "event occurred.\n", + "\n", + "Note how we request both raw features (`driver_hourly_stats:conv_rate`) and\n", + "the derived feature (`driver_efficiency:efficiency`). The ODFV runs\n", + "automatically — no extra code needed.\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "use_new_features", + "id": "7796c5fc", "metadata": {}, "outputs": [], "source": [ - "# Use the new feature in a historical query — combine features from both views\n", - "store = FeatureStore(repo_path=\".\") # reload to pick up new definitions\n", + "entity_df = pd.DataFrame({\n", + " \"driver_id\": [1001, 1002, 1003, 1004, 1005],\n", + " \"event_timestamp\": [now] * 5,\n", + "})\n", "\n", - "training_df_v2 = store.get_historical_features(\n", + "training_df = store.get_historical_features(\n", " entity_df=entity_df,\n", " features=[\n", " \"driver_hourly_stats:conv_rate\",\n", " \"driver_hourly_stats:acc_rate\",\n", " \"driver_hourly_stats:avg_daily_trips\",\n", - " \"driver_enhanced_stats:trips_per_hour\",\n", - " \"driver_enhanced_stats:is_high_activity\",\n", + " \"driver_efficiency:efficiency\", # <-- computed by Feast\n", " ],\n", ").to_df()\n", "\n", - "print(\"Training data with original + new features:\")\n", - "training_df_v2\n" + "print(\"Training data (point-in-time correct, incl. derived features):\")\n", + "training_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "train_model", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "\n", + "FEATURE_COLS = [\"acc_rate\", \"avg_daily_trips\", \"efficiency\"]\n", + "TARGET = \"conv_rate\"\n", + "\n", + "X = training_df[FEATURE_COLS].fillna(0)\n", + "y = training_df[TARGET].fillna(0)\n", + "\n", + "model = LinearRegression().fit(X, y)\n", + "print(f\"Model trained on {len(X)} samples.\")\n" ] }, { @@ -376,15 +374,15 @@ "id": "51b15c4b", "metadata": {}, "source": [ + "---\n", "## 6. Materialize features to Redis\n", "\n", - "Materialization reads the latest feature values from the offline parquet store\n", - "and writes them to Redis for low-latency online serving.\n", + "Materialization copies the latest feature values from the offline store\n", + "(parquet) into Redis for low-latency online serving.\n", "\n", - "This materializes **all** registered feature views — including our new\n", - "`driver_enhanced_stats` view.\n", - "\n", - "In production you would run this on a schedule (e.g. hourly cron job).\n" + "In production you would run this on a schedule (e.g. hourly cron job).\n", + "Note: only `FeatureView` data is materialized. ODFV transformations are\n", + "computed on-the-fly at query time.\n" ] }, { @@ -394,7 +392,13 @@ "metadata": {}, "outputs": [], "source": [ - "!feast materialize-incremental $(date -u +'%Y-%m-%dT%H:%M:%S')" + "from datetime import datetime, timedelta\n", + "\n", + "store.materialize(\n", + " start_date=datetime.now() - timedelta(days=7),\n", + " end_date=datetime.now(),\n", + ")\n", + "print(\"Materialized to Redis.\")\n" ] }, { @@ -402,13 +406,12 @@ "id": "66ec423c", "metadata": {}, "source": [ + "---\n", "## 7. Online feature serving\n", "\n", - "Retrieve the latest feature values for specific entities. The Feast SDK reads\n", - "directly from Redis — no round-trip through the Feast server pod is needed.\n", - "\n", - "This is what you call at inference time: given a `driver_id`, get their\n", - "current features (including the new ones) to feed into the model.\n" + "Retrieve the latest feature values for specific drivers. This is what you\n", + "call at inference time. The raw features come from Redis; the `efficiency`\n", + "feature is computed on-the-fly by the ODFV — same formula as during training.\n" ] }, { @@ -423,13 +426,12 @@ " \"driver_hourly_stats:conv_rate\",\n", " \"driver_hourly_stats:acc_rate\",\n", " \"driver_hourly_stats:avg_daily_trips\",\n", - " \"driver_enhanced_stats:trips_per_hour\",\n", - " \"driver_enhanced_stats:is_high_activity\",\n", + " \"driver_efficiency:efficiency\",\n", " ],\n", " entity_rows=[{\"driver_id\": 1001}, {\"driver_id\": 1002}],\n", ").to_dict()\n", "\n", - "print(\"Online features (latest values from Redis):\")\n", + "print(\"Online features (from Redis + ODFV):\")\n", "for k, v in online_features.items():\n", " print(f\" {k}: {v}\")\n" ] @@ -441,17 +443,6 @@ "metadata": {}, "outputs": [], "source": [ - "# Quick model training using features from both views\n", - "from sklearn.linear_model import LinearRegression\n", - "\n", - "FEATURE_COLS = [\"acc_rate\", \"avg_daily_trips\", \"trips_per_hour\", \"is_high_activity\"]\n", - "TARGET = \"conv_rate\"\n", - "\n", - "X = training_df_v2[FEATURE_COLS].fillna(0)\n", - "y = training_df_v2[TARGET].fillna(0)\n", - "\n", - "model = LinearRegression().fit(X, y)\n", - "\n", "# Use online features for inference\n", "inference_df = pd.DataFrame(online_features)\n", "predictions = model.predict(inference_df[FEATURE_COLS])\n", @@ -465,34 +456,32 @@ "id": "72a52751", "metadata": {}, "source": [ + "---\n", "## Summary\n", "\n", - "| Step | Command / API | Purpose |\n", - "|------|--------------|--------|\n", - "| Define features | `features.py` | Declare entities, sources, feature views |\n", - "| Register | `feast apply` | Write definitions to local SQLite registry |\n", - "| Training data | `store.get_historical_features()` | Point-in-time correct join from parquet |\n", - "| Add new features | Edit `features.py` + `feast apply` | Evolve feature definitions over time |\n", - "| Materialize | `feast materialize-incremental` | Push latest values to Redis online store |\n", - "| Online serving | `store.get_online_features()` | Sub-ms lookup from Redis by entity key |\n", - "\n", - "### Key takeaway: Adding new features\n", - "\n", - "The workflow for adding a new feature to Feast is:\n", - "1. Create/update the data source (parquet, database table, etc.)\n", - "2. Define a new `FeatureView` (or add fields to an existing one) in `features.py`\n", - "3. Run `feast apply` to register the new definitions\n", - "4. Run `feast materialize-incremental` to push values to the online store\n", - "\n", - "Once registered, the new features are immediately available via both\n", - "`get_historical_features()` (training) and `get_online_features()` (serving).\n", - "\n", - "### Note on the SQLite registry\n", - "\n", - "This notebook uses `/tmp/registry.db` as the registry, which is local to the\n", - "notebook session. Re-run the \"Configure\" and `feast apply` cells at the start\n", - "of each new session. The Redis online store is persistent across sessions —\n", - "features materialized in one session are still available in subsequent ones.\n" + "| Step | API | What happens |\n", + "|------|-----|-------------|\n", + "| Define | Python objects (Entity, FeatureView, ODFV) | Declare what features exist and how derived ones are computed |\n", + "| Register | `store.apply([...])` | Write definitions to the registry (once per session) |\n", + "| Train | `store.get_historical_features()` | Point-in-time join from parquet; ODFVs run inline |\n", + "| Materialize | `store.materialize()` | Push latest raw values to Redis |\n", + "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis; ODFVs run inline |\n", + "\n", + "### FeatureView vs On Demand Feature View\n", + "\n", + "| | FeatureView | On Demand Feature View |\n", + "|-|-------------|------------------------|\n", + "| Data | Precomputed in your pipeline | Computed by Feast at query time |\n", + "| Materialized to Redis? | Yes | No (computed on-the-fly) |\n", + "| Good for | Raw/heavy features | Lightweight derived features, request-time inputs |\n", + "| Consistency | You ensure pipeline runs | Feast guarantees same logic in training & serving |\n", + "\n", + "### About the registry\n", + "\n", + "The registry (`/tmp/registry.db`) stores only *definitions* — not feature\n", + "values. It is ephemeral in this notebook setup. If your pod restarts, re-run\n", + "the \"Define & Register\" cells. Your feature *data* in Redis and on PVC is\n", + "not affected.\n" ] } ], diff --git a/feast/features.py b/feast/features.py deleted file mode 100644 index 8116f89..0000000 --- a/feast/features.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -Feast feature definitions for the driver stats example. - -This file defines: -- An entity (driver_id) identifying what we're tracking features for -- A data source (parquet file with historical driver data) -- A feature view (driver_hourly_stats) with three features - -To register these with the Feast registry: - feast apply - -To materialize features to the online store: - feast materialize-incremental $(date -u +"%Y-%m-%dT%H:%M:%S") -""" - -from datetime import timedelta - -from feast import Entity, FeatureView, Field, FileSource -from feast.types import Float32, Int64 - -# Entity: the "primary key" for feature lookups. -# When you request features, you provide entity values (e.g. driver_id=1001). -driver = Entity( - name="driver_id", - description="Unique driver identifier", -) - -# Data source: where historical feature data lives. -# This parquet file is generated by the notebook example. -driver_stats_source = FileSource( - path="data/driver_stats.parquet", - timestamp_field="event_timestamp", - created_timestamp_column="created", -) - -# Feature view: a logical group of features from one data source. -# - `ttl` controls how stale a feature can be before it's considered expired -# - `online=True` means features are materialized to the online store -driver_hourly_stats = FeatureView( - name="driver_hourly_stats", - entities=[driver], - ttl=timedelta(days=7), - schema=[ - Field(name="conv_rate", dtype=Float32), - Field(name="acc_rate", dtype=Float32), - Field(name="avg_daily_trips", dtype=Int64), - ], - source=driver_stats_source, - online=True, -) From aab21ceb058d542eb949f9cce5bc10cd98e597e7 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 15:43:17 +0200 Subject: [PATCH 03/22] Improve readability: verbose entity query, fix deprecation warning, explain ODFV warning - Add value_type=ValueType.INT64 to Entity to fix DeprecationWarning - Make entity_df construction more verbose with comments explaining the query - Add note about ODFV RuntimeWarning (experimental, irrelevant at this scale) --- feast/feast_example.ipynb | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index dc7fdeb..6ab5b43 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -206,7 +206,7 @@ "source": [ "from datetime import timedelta\n", "\n", - "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, RequestSource\n", + "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, RequestSource, ValueType\n", "from feast.on_demand_feature_view import on_demand_feature_view\n", "from feast.types import Float32, Float64, Int64\n", "\n", @@ -216,6 +216,7 @@ "# ---------------------------------------------------------------------------\n", "driver = Entity(\n", " name=\"driver_id\",\n", + " value_type=ValueType.INT64,\n", " description=\"Unique driver identifier\",\n", ")\n", "\n", @@ -321,7 +322,13 @@ "\n", "Note how we request both raw features (`driver_hourly_stats:conv_rate`) and\n", "the derived feature (`driver_efficiency:efficiency`). The ODFV runs\n", - "automatically — no extra code needed.\n" + "automatically — no extra code needed.\n", + "\n", + "> **Note:** Feast will show a `RuntimeWarning` that on-demand feature views\n", + "> are experimental and don't scale well for offline retrieval. For this\n", + "> notebook-sized dataset that's irrelevant. For large-scale training data\n", + "> (millions of rows), precompute heavy features in your pipeline and use a\n", + "> regular `FeatureView` instead.\n" ] }, { @@ -331,11 +338,22 @@ "metadata": {}, "outputs": [], "source": [ + "# Build a query: \"give me features for these drivers, as of this point in time.\"\n", + "# Each row says: I want to know the feature values for driver X at time T.\n", + "# Feast will find the most recent feature values that were available at that\n", + "# timestamp — this is the \"point-in-time join\" that prevents data leakage.\n", + "\n", + "drivers_to_query = [1001, 1002, 1003, 1004, 1005]\n", + "query_timestamp = now # \"as of right now\"\n", + "\n", "entity_df = pd.DataFrame({\n", - " \"driver_id\": [1001, 1002, 1003, 1004, 1005],\n", - " \"event_timestamp\": [now] * 5,\n", + " \"driver_id\": drivers_to_query,\n", + " \"event_timestamp\": [query_timestamp] * len(drivers_to_query),\n", "})\n", "\n", + "print(\"Query: get features for these drivers as of this timestamp:\")\n", + "print(entity_df.to_string(index=False))\n", + "\n", "training_df = store.get_historical_features(\n", " entity_df=entity_df,\n", " features=[\n", From 17a89372f60b6f7bf112b3c8172d8accd733e92d Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 15:44:26 +0200 Subject: [PATCH 04/22] Use write_to_online_store=True on ODFV so efficiency is precomputed in Redis The derived feature is now computed during materialization and stored in Redis, giving the same sub-ms latency as raw features at serving time. --- feast/feast_example.ipynb | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 6ab5b43..8fa59e8 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -247,13 +247,15 @@ ")\n", "\n", "# ---------------------------------------------------------------------------\n", - "# On Demand Feature View: a derived feature computed by Feast at query time.\n", + "# On Demand Feature View: a derived feature that Feast computes for you.\n", "#\n", "# \"efficiency\" = conv_rate / acc_rate\n", "#\n", - "# This transformation runs automatically when you request this feature —\n", - "# both during training (get_historical_features) and serving\n", - "# (get_online_features). You define it once; Feast guarantees consistency.\n", + "# write_to_online_store=True means the transformation runs during\n", + "# materialization and the result is stored in Redis. At serving time,\n", + "# Feast reads the precomputed value — no on-the-fly computation needed.\n", + "# For training (get_historical_features), the transformation still runs\n", + "# inline over the offline data.\n", "# ---------------------------------------------------------------------------\n", "@on_demand_feature_view(\n", " sources=[driver_hourly_stats],\n", @@ -261,6 +263,7 @@ " Field(name=\"efficiency\", dtype=Float64),\n", " ],\n", " mode=\"pandas\",\n", + " write_to_online_store=True,\n", ")\n", "def driver_efficiency(features_df: pd.DataFrame) -> pd.DataFrame:\n", " df = pd.DataFrame()\n", @@ -396,11 +399,11 @@ "## 6. Materialize features to Redis\n", "\n", "Materialization copies the latest feature values from the offline store\n", - "(parquet) into Redis for low-latency online serving.\n", + "(parquet) into Redis for low-latency online serving. Because our ODFV uses\n", + "`write_to_online_store=True`, the derived `efficiency` feature is also\n", + "computed and stored in Redis during this step.\n", "\n", - "In production you would run this on a schedule (e.g. hourly cron job).\n", - "Note: only `FeatureView` data is materialized. ODFV transformations are\n", - "computed on-the-fly at query time.\n" + "In production you would run this on a schedule (e.g. hourly cron job).\n" ] }, { @@ -428,8 +431,8 @@ "## 7. Online feature serving\n", "\n", "Retrieve the latest feature values for specific drivers. This is what you\n", - "call at inference time. The raw features come from Redis; the `efficiency`\n", - "feature is computed on-the-fly by the ODFV — same formula as during training.\n" + "call at inference time. All values — including `efficiency` — come straight\n", + "from Redis (precomputed during materialization).\n" ] }, { @@ -483,14 +486,14 @@ "| Register | `store.apply([...])` | Write definitions to the registry (once per session) |\n", "| Train | `store.get_historical_features()` | Point-in-time join from parquet; ODFVs run inline |\n", "| Materialize | `store.materialize()` | Push latest raw values to Redis |\n", - "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis; ODFVs run inline |\n", + "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis (incl. precomputed ODFVs) |\n", "\n", "### FeatureView vs On Demand Feature View\n", "\n", "| | FeatureView | On Demand Feature View |\n", "|-|-------------|------------------------|\n", "| Data | Precomputed in your pipeline | Computed by Feast at query time |\n", - "| Materialized to Redis? | Yes | No (computed on-the-fly) |\n", + "| Materialized to Redis? | Yes | Yes (with `write_to_online_store=True`) or No (computed on-the-fly) |\n", "| Good for | Raw/heavy features | Lightweight derived features, request-time inputs |\n", "| Consistency | You ensure pipeline runs | Feast guarantees same logic in training & serving |\n", "\n", From e2ccba32e9480c832518a3db69d0bd2687290442 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 15:49:19 +0200 Subject: [PATCH 05/22] Use default storage class instead of mayastor-no-redundancy --- feast/feast-cr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feast/feast-cr.yaml b/feast/feast-cr.yaml index 7aa27fb..745909f 100644 --- a/feast/feast-cr.yaml +++ b/feast/feast-cr.yaml @@ -26,7 +26,7 @@ spec: pvc: mountPath: /data/registry create: - storageClassName: mayastor-no-redundancy # adjust for your cluster + # storageClassName: default # omit to use cluster default resources: requests: storage: 1Gi From 8ace6492c97f3dd34c79c08eead22bc183bd998d Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 15:57:08 +0200 Subject: [PATCH 06/22] Add Production Setup section to notebook Explains how the workflow changes in production: - Feature definitions in Git (auto-applied on pod start) - Registry Server exposed for remote clients - Notebooks use the operator-generated client ConfigMap - Materialization runs as a CronJob, not from notebooks --- feast/feast_example.ipynb | 91 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 8fa59e8..37e65fd 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -504,6 +504,97 @@ "the \"Define & Register\" cells. Your feature *data* in Redis and on PVC is\n", "not affected.\n" ] + }, + { + "cell_type": "markdown", + "id": "production_setup", + "metadata": {}, + "source": [ + "---\n", + "## Production Setup\n", + "\n", + "This notebook uses a **local, interactive workflow** — you define features\n", + "inline, register them with `store.apply()`, and connect directly to Redis.\n", + "That's great for experimentation. In production, the architecture looks\n", + "different:\n", + "\n", + "### 1. Feature definitions live in Git\n", + "\n", + "Instead of defining features in a notebook, you put them in a `features.py`\n", + "file in a Git repository. The Feast Operator clones the repo on startup and\n", + "runs `feast apply` automatically:\n", + "\n", + "```yaml\n", + "# feast-cr.yaml (production)\n", + "spec:\n", + " feastProject: my_features\n", + " feastProjectDir:\n", + " git:\n", + " url: https://github.com/your-org/feast-feature-repo\n", + " ref: main # or pin to a commit SHA\n", + "```\n", + "\n", + "This means feature definitions are version-controlled, reviewed via PRs,\n", + "and automatically deployed when the pod starts.\n", + "\n", + "### 2. The Feast Server exposes a Registry Server\n", + "\n", + "Add `server: {}` to the registry config to expose it as a gRPC endpoint.\n", + "Notebooks and other clients can then read feature metadata remotely:\n", + "\n", + "```yaml\n", + "# feast-cr.yaml (production)\n", + "spec:\n", + " services:\n", + " registry:\n", + " local:\n", + " server: {} # exposes gRPC on port 6570\n", + " persistence:\n", + " file:\n", + " pvc:\n", + " mountPath: /data/registry\n", + " create: {}\n", + "```\n", + "\n", + "### 3. Notebooks use the remote client\n", + "\n", + "The Feast Operator creates a ConfigMap (`feast--client`) with the\n", + "client config. Instead of building `feature_store.yaml` manually, your\n", + "notebook just mounts it or copies it:\n", + "\n", + "```python\n", + "# Production notebook — no local registry, no direct Redis\n", + "store = FeatureStore(repo_path=\".\") # reads feature_store.yaml from ConfigMap\n", + "\n", + "# get_online_features goes through the Feast Server (HTTP)\n", + "# get_historical_features goes through the Offline Server (Arrow Flight)\n", + "# Feature metadata comes from the Registry Server (gRPC)\n", + "```\n", + "\n", + "### 4. Materialization runs as a CronJob\n", + "\n", + "Instead of running `store.materialize()` from a notebook, you set up a\n", + "Kubernetes CronJob that runs on a schedule (e.g. hourly). The Feast\n", + "Operator can manage this via the `batchEngine` config.\n", + "\n", + "### Architecture overview\n", + "\n", + "```\n", + " You (notebook) Feast Server Pod\n", + " ┌──────────────┐ ┌──────────────────────┐\n", + " │ FeatureStore │── online ──────▶│ Online Feature Server │──▶ Redis\n", + " │ (remote │── historical ──▶│ Offline Feature Server│──▶ Parquet/PVC\n", + " │ client) │── metadata ────▶│ Registry Server │──▶ SQLite/PVC\n", + " └──────────────┘ └──────────────────────┘\n", + " ▲\n", + " CronJob ── materialize ─────────────────┘\n", + " Git repo ── feast apply (on pod start) ─┘\n", + "```\n", + "\n", + "For the full production deployment guide, see the\n", + "[Feast Production Deployment Topologies](https://docs.feast.dev/how-to-guides/production-deployment-topologies)\n", + "documentation.\n" + ] } ], "metadata": { From 97cc4961cfe1864d8cd56df1b4044632f1f2eab0 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 16:04:12 +0200 Subject: [PATCH 07/22] Rewrite example as retail return prediction scenario Replace abstract driver-stats example with a concrete retailer use case: - Entity: customer_id with order history features - FeatureView: total_orders, total_returns, avg_order_value, days_since_last_order - ODFV: return_rate and return_risk (derived, written to online store) - Preprocessing: filter new customers (< 3 orders), drop nulls, normalize - Model: LogisticRegression to predict return probability - Serving: flag high-risk orders for proactive customer service Also add prod-readiness disclaimer to README and update project name to retail_features. --- feast/README.md | 18 +- feast/feast-cr.yaml | 2 +- feast/feast_example.ipynb | 374 +++++++++++++++++++++++++++----------- feast/feature_store.yaml | 2 +- 4 files changed, 281 insertions(+), 115 deletions(-) diff --git a/feast/README.md b/feast/README.md index 5d0b0da..e7570ce 100644 --- a/feast/README.md +++ b/feast/README.md @@ -1,7 +1,17 @@ -# Feast Feature Store Examples +# Feast Feature Store Example -These examples demonstrate how to use [Feast](https://docs.feast.dev/) on prokube -for feature management in ML workflows. +A complete example of using [Feast](https://docs.feast.dev/) on prokube for +feature management in ML workflows. + +**Scenario:** An online retailer wants to predict whether a customer will +return their next order. The notebook walks through defining customer features, +training a return-risk model, and serving predictions in real time. + +> **Note:** This example uses SQLite (registry), single-replica Redis (online +> store), and local parquet files (offline store). This is fine for development +> and experimentation. For production, use PostgreSQL-backed registry, Redis +> Cluster, and a proper data warehouse. See the "Production Setup" section in +> the notebook for details. ## Prerequisites @@ -64,7 +74,7 @@ Open `feast_example.ipynb` in your Kubeflow notebook. The notebook reads the | `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | | `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR | | `feature_store.yaml` | Feast SDK config template — the notebook generates this automatically | -| `feast_example.ipynb` | End-to-end notebook: define features, register, train, materialize, serve | +| `feast_example.ipynb` | End-to-end notebook: retail return prediction with Feast | ### Why two YAML files? diff --git a/feast/feast-cr.yaml b/feast/feast-cr.yaml index 745909f..fcd044d 100644 --- a/feast/feast-cr.yaml +++ b/feast/feast-cr.yaml @@ -14,7 +14,7 @@ metadata: name: my-store namespace: # <-- change this spec: - feastProject: my_features + feastProject: retail_features services: runFeastApplyOnInit: false securityContext: diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 37e65fd..0ca7854 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -7,15 +7,20 @@ "source": [ "# Feast Feature Store on prokube\n", "\n", + "**Scenario:** You work at an online retailer. Your team wants to predict\n", + "whether a customer will return their next order. To do that, you need\n", + "customer-level features (order history, return rates, spending patterns)\n", + "available for both model training and real-time inference.\n", + "\n", "This notebook walks through the full Feast workflow:\n", "\n", "1. **Setup** — install dependencies, configure the Feast client\n", - "2. **Generate data** — create sample driver statistics\n", + "2. **Generate data** — simulate customer order history\n", "3. **Define features** — entities, feature views, and on-demand transformations\n", "4. **Register** — push definitions to the Feast registry\n", - "5. **Train** — retrieve historical features with point-in-time correctness\n", + "5. **Train** — retrieve historical features, preprocess, train a return predictor\n", "6. **Materialize** — push latest values to Redis for online serving\n", - "7. **Serve** — retrieve features at inference time\n", + "7. **Serve** — predict return risk for incoming orders in real time\n", "\n", "Everything happens inline in this notebook — no external Python files or CLI\n", "commands needed.\n", @@ -25,7 +30,11 @@ "Before running this notebook, make sure you have deployed:\n", "- A **Redis** instance in your namespace (`redis-cr.yaml`)\n", "- A **FeatureStore** CR in your namespace (`feast-cr.yaml`)\n", - "- The **feast-redis-config** secret (see `README.md` for setup steps)\n" + "- The **feast-redis-config** secret (see `README.md` for setup steps)\n", + "\n", + "> **Note:** This setup (SQLite registry, single-replica Redis, local parquet\n", + "> files) is for **development and experimentation**. For a production-ready\n", + "> deployment, see the [Production Setup](#Production-Setup) section at the end.\n" ] }, { @@ -102,7 +111,7 @@ "\n", "NAMESPACE = get_namespace()\n", "REDIS_CONNECTION_STRING = get_redis_connection_string()\n", - "FEAST_PROJECT = \"my_features\"\n", + "FEAST_PROJECT = \"retail_features\"\n", "\n", "feature_store_yaml = (\n", " f\"project: {FEAST_PROJECT}\\n\"\n", @@ -136,8 +145,20 @@ "---\n", "## 2. Generate sample data\n", "\n", - "We create a parquet file simulating hourly driver statistics over the past\n", - "7 days. In a real project this would come from your data pipeline.\n" + "We simulate a customer order history table — the kind of data your data\n", + "pipeline would produce daily. Each row represents the aggregated stats for\n", + "one customer at one point in time:\n", + "\n", + "| Column | Meaning |\n", + "|--------|--------|\n", + "| `customer_id` | Unique customer identifier |\n", + "| `total_orders` | Total number of orders placed |\n", + "| `total_returns` | Total number of returned orders |\n", + "| `avg_order_value` | Average order value in EUR |\n", + "| `days_since_last_order` | Days since the customer's last order |\n", + "| `returned` | Did the customer return their most recent order? (label) |\n", + "\n", + "In a real project, this would come from your data warehouse or ETL pipeline.\n" ] }, { @@ -154,23 +175,47 @@ "import pandas as pd\n", "\n", "np.random.seed(42)\n", - "n = 1000\n", + "n_customers = 200\n", + "n_snapshots = 10 # 10 daily snapshots per customer\n", + "n = n_customers * n_snapshots\n", "now = datetime.datetime.now()\n", - "timestamps = [now - datetime.timedelta(hours=i) for i in range(n)]\n", "\n", - "driver_df = pd.DataFrame({\n", - " \"driver_id\": np.random.choice([1001, 1002, 1003, 1004, 1005], n),\n", + "customer_ids = np.repeat(np.arange(1, n_customers + 1), n_snapshots)\n", + "timestamps = []\n", + "for _ in range(n_customers):\n", + " timestamps.extend([now - datetime.timedelta(days=i) for i in range(n_snapshots)])\n", + "\n", + "# Simulate realistic customer stats\n", + "total_orders = np.random.randint(1, 80, n).astype(np.int64)\n", + "total_returns = np.array([\n", + " np.random.binomial(orders, np.random.uniform(0.05, 0.4))\n", + " for orders in total_orders\n", + "]).astype(np.int64)\n", + "avg_order_value = np.random.uniform(15.0, 250.0, n).astype(np.float32)\n", + "days_since_last_order = np.random.randint(0, 90, n).astype(np.int64)\n", + "\n", + "# Label: customers with high return rates and high order values are more\n", + "# likely to return. Add noise to keep it realistic.\n", + "return_rate = total_returns / np.maximum(total_orders, 1)\n", + "return_prob = 0.3 * return_rate + 0.002 * avg_order_value / 250.0 + np.random.normal(0, 0.1, n)\n", + "returned = (return_prob > 0.15).astype(np.int64)\n", + "\n", + "customer_df = pd.DataFrame({\n", + " \"customer_id\": customer_ids,\n", " \"event_timestamp\": timestamps,\n", - " \"conv_rate\": np.random.uniform(0.1, 1.0, n).astype(np.float32),\n", - " \"acc_rate\": np.random.uniform(0.5, 1.0, n).astype(np.float32),\n", - " \"avg_daily_trips\": np.random.randint(1, 50, n).astype(np.int64),\n", + " \"total_orders\": total_orders,\n", + " \"total_returns\": total_returns,\n", + " \"avg_order_value\": avg_order_value,\n", + " \"days_since_last_order\": days_since_last_order,\n", + " \"returned\": returned,\n", " \"created\": timestamps,\n", "})\n", "\n", "os.makedirs(\"data\", exist_ok=True)\n", - "driver_df.to_parquet(\"data/driver_stats.parquet\")\n", - "print(f\"Created {n} rows for {driver_df['driver_id'].nunique()} drivers\")\n", - "driver_df.head()\n" + "customer_df.to_parquet(\"data/customer_orders.parquet\")\n", + "print(f\"Created {n} rows for {n_customers} customers ({n_snapshots} snapshots each)\")\n", + "print(f\"Return rate in dataset: {returned.mean():.1%}\")\n", + "customer_df.head(10)\n" ] }, { @@ -206,68 +251,75 @@ "source": [ "from datetime import timedelta\n", "\n", - "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, RequestSource, ValueType\n", + "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, ValueType\n", "from feast.on_demand_feature_view import on_demand_feature_view\n", "from feast.types import Float32, Float64, Int64\n", "\n", "# ---------------------------------------------------------------------------\n", "# Entity: the \"primary key\" for feature lookups.\n", - "# When you request features, you provide entity values (e.g. driver_id=1001).\n", + "# When you request features, you provide a customer_id.\n", "# ---------------------------------------------------------------------------\n", - "driver = Entity(\n", - " name=\"driver_id\",\n", + "customer = Entity(\n", + " name=\"customer_id\",\n", " value_type=ValueType.INT64,\n", - " description=\"Unique driver identifier\",\n", + " description=\"Unique customer identifier\",\n", ")\n", "\n", "# ---------------------------------------------------------------------------\n", - "# Data source: points to where historical data lives.\n", + "# Data source: points to the parquet file produced by the data pipeline.\n", "# ---------------------------------------------------------------------------\n", - "driver_stats_source = FileSource(\n", - " path=\"data/driver_stats.parquet\",\n", + "customer_orders_source = FileSource(\n", + " path=\"data/customer_orders.parquet\",\n", " timestamp_field=\"event_timestamp\",\n", " created_timestamp_column=\"created\",\n", ")\n", "\n", "# ---------------------------------------------------------------------------\n", "# FeatureView: declares which columns from the source are features.\n", - "# These are raw/precomputed values — Feast just stores and serves them.\n", + "# These are raw/precomputed values from your data pipeline.\n", + "#\n", + "# Note: \"returned\" is our label, not a feature. We include it in the source\n", + "# data but won't register it as a feature — we'll query it separately for\n", + "# training.\n", "# ---------------------------------------------------------------------------\n", - "driver_hourly_stats = FeatureView(\n", - " name=\"driver_hourly_stats\",\n", - " entities=[driver],\n", - " ttl=timedelta(days=7),\n", + "customer_order_stats = FeatureView(\n", + " name=\"customer_order_stats\",\n", + " entities=[customer],\n", + " ttl=timedelta(days=30),\n", " schema=[\n", - " Field(name=\"conv_rate\", dtype=Float32),\n", - " Field(name=\"acc_rate\", dtype=Float32),\n", - " Field(name=\"avg_daily_trips\", dtype=Int64),\n", + " Field(name=\"total_orders\", dtype=Int64),\n", + " Field(name=\"total_returns\", dtype=Int64),\n", + " Field(name=\"avg_order_value\", dtype=Float32),\n", + " Field(name=\"days_since_last_order\", dtype=Int64),\n", " ],\n", - " source=driver_stats_source,\n", + " source=customer_orders_source,\n", " online=True,\n", ")\n", "\n", "# ---------------------------------------------------------------------------\n", - "# On Demand Feature View: a derived feature that Feast computes for you.\n", + "# On Demand Feature View: derived features computed by Feast.\n", "#\n", - "# \"efficiency\" = conv_rate / acc_rate\n", + "# return_rate: what fraction of orders did this customer return?\n", + "# return_risk: return_rate * avg_order_value — a simple risk score.\n", + "# High return rate + expensive orders = high financial risk.\n", "#\n", - "# write_to_online_store=True means the transformation runs during\n", - "# materialization and the result is stored in Redis. At serving time,\n", - "# Feast reads the precomputed value — no on-the-fly computation needed.\n", - "# For training (get_historical_features), the transformation still runs\n", - "# inline over the offline data.\n", + "# write_to_online_store=True means these are precomputed during\n", + "# materialization and stored in Redis. No on-the-fly computation at\n", + "# serving time.\n", "# ---------------------------------------------------------------------------\n", "@on_demand_feature_view(\n", - " sources=[driver_hourly_stats],\n", + " sources=[customer_order_stats],\n", " schema=[\n", - " Field(name=\"efficiency\", dtype=Float64),\n", + " Field(name=\"return_rate\", dtype=Float64),\n", + " Field(name=\"return_risk\", dtype=Float64),\n", " ],\n", " mode=\"pandas\",\n", " write_to_online_store=True,\n", ")\n", - "def driver_efficiency(features_df: pd.DataFrame) -> pd.DataFrame:\n", + "def customer_risk_features(features_df: pd.DataFrame) -> pd.DataFrame:\n", " df = pd.DataFrame()\n", - " df[\"efficiency\"] = features_df[\"conv_rate\"] / features_df[\"acc_rate\"]\n", + " df[\"return_rate\"] = features_df[\"total_returns\"] / features_df[\"total_orders\"].clip(lower=1)\n", + " df[\"return_risk\"] = df[\"return_rate\"] * features_df[\"avg_order_value\"]\n", " return df\n", "\n", "\n", @@ -297,10 +349,10 @@ "store = FeatureStore(repo_path=\".\")\n", "\n", "store.apply([\n", - " driver,\n", - " driver_stats_source,\n", - " driver_hourly_stats,\n", - " driver_efficiency,\n", + " customer,\n", + " customer_orders_source,\n", + " customer_order_stats,\n", + " customer_risk_features,\n", "])\n", "\n", "print(\"Registered:\")\n", @@ -316,15 +368,15 @@ "metadata": {}, "source": [ "---\n", - "## 5. Retrieve historical features for training\n", + "## 5. Retrieve historical features and train a model\n", "\n", - "`get_historical_features()` performs a **point-in-time join**: for each entity\n", - "row, it finds the most recent feature values *as of that timestamp*. This\n", - "prevents data leakage — you only see features that were available when the\n", - "event occurred.\n", + "`get_historical_features()` performs a **point-in-time join**: for each\n", + "customer, it finds the most recent feature values *as of that timestamp*.\n", + "This prevents data leakage — you only see features that were available when\n", + "the event occurred.\n", "\n", - "Note how we request both raw features (`driver_hourly_stats:conv_rate`) and\n", - "the derived feature (`driver_efficiency:efficiency`). The ODFV runs\n", + "Note how we request both raw features (`customer_order_stats:total_orders`)\n", + "and derived features (`customer_risk_features:return_rate`). The ODFV runs\n", "automatically — no extra code needed.\n", "\n", "> **Note:** Feast will show a `RuntimeWarning` that on-demand feature views\n", @@ -341,34 +393,104 @@ "metadata": {}, "outputs": [], "source": [ - "# Build a query: \"give me features for these drivers, as of this point in time.\"\n", - "# Each row says: I want to know the feature values for driver X at time T.\n", + "# Build a query: \"give me features for these customers, as of right now.\"\n", + "# Each row says: I want to know the feature values for customer X at time T.\n", "# Feast will find the most recent feature values that were available at that\n", "# timestamp — this is the \"point-in-time join\" that prevents data leakage.\n", "\n", - "drivers_to_query = [1001, 1002, 1003, 1004, 1005]\n", + "all_customer_ids = list(range(1, n_customers + 1))\n", "query_timestamp = now # \"as of right now\"\n", "\n", "entity_df = pd.DataFrame({\n", - " \"driver_id\": drivers_to_query,\n", - " \"event_timestamp\": [query_timestamp] * len(drivers_to_query),\n", + " \"customer_id\": all_customer_ids,\n", + " \"event_timestamp\": [query_timestamp] * len(all_customer_ids),\n", "})\n", "\n", - "print(\"Query: get features for these drivers as of this timestamp:\")\n", - "print(entity_df.to_string(index=False))\n", + "print(f\"Querying features for {len(all_customer_ids)} customers...\")\n", "\n", "training_df = store.get_historical_features(\n", " entity_df=entity_df,\n", " features=[\n", - " \"driver_hourly_stats:conv_rate\",\n", - " \"driver_hourly_stats:acc_rate\",\n", - " \"driver_hourly_stats:avg_daily_trips\",\n", - " \"driver_efficiency:efficiency\", # <-- computed by Feast\n", + " \"customer_order_stats:total_orders\",\n", + " \"customer_order_stats:total_returns\",\n", + " \"customer_order_stats:avg_order_value\",\n", + " \"customer_order_stats:days_since_last_order\",\n", + " \"customer_risk_features:return_rate\", # <-- computed by Feast\n", + " \"customer_risk_features:return_risk\", # <-- computed by Feast\n", " ],\n", ").to_df()\n", "\n", - "print(\"Training data (point-in-time correct, incl. derived features):\")\n", - "training_df\n" + "print(f\"Retrieved {len(training_df)} rows.\")\n", + "training_df.head(10)\n" + ] + }, + { + "cell_type": "markdown", + "id": "preprocess_header", + "metadata": {}, + "source": [ + "### Preprocessing\n", + "\n", + "Before training, we need to clean up the data:\n", + "\n", + "1. **Join the label** — `returned` is not a Feast feature (it's our\n", + " prediction target). We join it from the original data.\n", + "2. **Filter out new customers** — customers with fewer than 3 orders don't\n", + " have enough history for reliable features. Feeding them into the model\n", + " would add noise.\n", + "3. **Drop nulls** — any rows where Feast couldn't find matching features.\n", + "4. **Normalize** — scale numeric features so the model doesn't overweight\n", + " high-magnitude columns like `avg_order_value`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "preprocess", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "# --- 1. Join the label from the source data ---\n", + "# Get the most recent snapshot per customer to use as label\n", + "latest_labels = (\n", + " customer_df\n", + " .sort_values(\"event_timestamp\")\n", + " .groupby(\"customer_id\")\n", + " .last()[[\"returned\"]]\n", + " .reset_index()\n", + ")\n", + "training_df = training_df.merge(latest_labels, on=\"customer_id\", how=\"left\")\n", + "\n", + "print(f\"After joining labels: {len(training_df)} rows\")\n", + "\n", + "# --- 2. Filter out new customers (< 3 orders) ---\n", + "before = len(training_df)\n", + "training_df = training_df[training_df[\"total_orders\"] >= 3].copy()\n", + "print(f\"After filtering new customers (< 3 orders): {len(training_df)} rows (dropped {before - len(training_df)})\")\n", + "\n", + "# --- 3. Drop nulls ---\n", + "before = len(training_df)\n", + "training_df = training_df.dropna()\n", + "print(f\"After dropping nulls: {len(training_df)} rows (dropped {before - len(training_df)})\")\n", + "\n", + "# --- 4. Normalize features ---\n", + "FEATURE_COLS = [\"total_orders\", \"total_returns\", \"avg_order_value\",\n", + " \"days_since_last_order\", \"return_rate\", \"return_risk\"]\n", + "TARGET = \"returned\"\n", + "\n", + "scaler = StandardScaler()\n", + "X = pd.DataFrame(\n", + " scaler.fit_transform(training_df[FEATURE_COLS]),\n", + " columns=FEATURE_COLS,\n", + " index=training_df.index,\n", + ")\n", + "y = training_df[TARGET]\n", + "\n", + "print(f\"\\nTraining set: {len(X)} samples, {len(FEATURE_COLS)} features\")\n", + "print(f\"Class balance: {y.mean():.1%} returns\")\n", + "X.describe().round(2)\n" ] }, { @@ -378,16 +500,19 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.linear_model import LinearRegression\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split\n", "\n", - "FEATURE_COLS = [\"acc_rate\", \"avg_daily_trips\", \"efficiency\"]\n", - "TARGET = \"conv_rate\"\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42, stratify=y\n", + ")\n", "\n", - "X = training_df[FEATURE_COLS].fillna(0)\n", - "y = training_df[TARGET].fillna(0)\n", + "model = LogisticRegression(random_state=42, max_iter=1000)\n", + "model.fit(X_train, y_train)\n", "\n", - "model = LinearRegression().fit(X, y)\n", - "print(f\"Model trained on {len(X)} samples.\")\n" + "y_pred = model.predict(X_test)\n", + "print(classification_report(y_test, y_pred, target_names=[\"kept\", \"returned\"]))\n" ] }, { @@ -400,10 +525,11 @@ "\n", "Materialization copies the latest feature values from the offline store\n", "(parquet) into Redis for low-latency online serving. Because our ODFV uses\n", - "`write_to_online_store=True`, the derived `efficiency` feature is also\n", - "computed and stored in Redis during this step.\n", + "`write_to_online_store=True`, the derived `return_rate` and `return_risk`\n", + "features are also computed and stored in Redis during this step.\n", "\n", - "In production you would run this on a schedule (e.g. hourly cron job).\n" + "In production you would run this on a schedule (e.g. daily after your\n", + "pipeline updates the parquet files).\n" ] }, { @@ -416,7 +542,7 @@ "from datetime import datetime, timedelta\n", "\n", "store.materialize(\n", - " start_date=datetime.now() - timedelta(days=7),\n", + " start_date=datetime.now() - timedelta(days=30),\n", " end_date=datetime.now(),\n", ")\n", "print(\"Materialized to Redis.\")\n" @@ -428,11 +554,14 @@ "metadata": {}, "source": [ "---\n", - "## 7. Online feature serving\n", + "## 7. Online feature serving — predict return risk\n", + "\n", + "A customer just placed an order. Your order service calls Feast to get\n", + "their features, feeds them into the model, and decides whether to flag\n", + "the order for proactive customer service.\n", "\n", - "Retrieve the latest feature values for specific drivers. This is what you\n", - "call at inference time. All values — including `efficiency` — come straight\n", - "from Redis (precomputed during materialization).\n" + "All values — including the derived `return_rate` and `return_risk` —\n", + "come straight from Redis (precomputed during materialization).\n" ] }, { @@ -442,19 +571,24 @@ "metadata": {}, "outputs": [], "source": [ + "# Simulate: these 3 customers just placed a new order\n", + "customers_with_new_orders = [{\"customer_id\": 5}, {\"customer_id\": 42}, {\"customer_id\": 137}]\n", + "\n", "online_features = store.get_online_features(\n", " features=[\n", - " \"driver_hourly_stats:conv_rate\",\n", - " \"driver_hourly_stats:acc_rate\",\n", - " \"driver_hourly_stats:avg_daily_trips\",\n", - " \"driver_efficiency:efficiency\",\n", + " \"customer_order_stats:total_orders\",\n", + " \"customer_order_stats:total_returns\",\n", + " \"customer_order_stats:avg_order_value\",\n", + " \"customer_order_stats:days_since_last_order\",\n", + " \"customer_risk_features:return_rate\",\n", + " \"customer_risk_features:return_risk\",\n", " ],\n", - " entity_rows=[{\"driver_id\": 1001}, {\"driver_id\": 1002}],\n", + " entity_rows=customers_with_new_orders,\n", ").to_dict()\n", "\n", - "print(\"Online features (from Redis + ODFV):\")\n", - "for k, v in online_features.items():\n", - " print(f\" {k}: {v}\")\n" + "print(\"Online features (from Redis):\")\n", + "online_df = pd.DataFrame(online_features)\n", + "online_df\n" ] }, { @@ -464,12 +598,20 @@ "metadata": {}, "outputs": [], "source": [ - "# Use online features for inference\n", - "inference_df = pd.DataFrame(online_features)\n", - "predictions = model.predict(inference_df[FEATURE_COLS])\n", + "# Run inference: predict return probability and flag high-risk orders\n", + "X_inference = pd.DataFrame(\n", + " scaler.transform(online_df[FEATURE_COLS]),\n", + " columns=FEATURE_COLS,\n", + ")\n", + "\n", + "return_probabilities = model.predict_proba(X_inference)[:, 1]\n", + "\n", + "RISK_THRESHOLD = 0.5\n", "\n", - "for driver_id, pred in zip(inference_df[\"driver_id\"], predictions):\n", - " print(f\"Driver {driver_id}: predicted conv_rate = {pred:.4f}\")\n" + "print(\"\\n--- Return Risk Assessment ---\")\n", + "for cid, prob in zip(online_df[\"customer_id\"], return_probabilities):\n", + " flag = \"HIGH RISK\" if prob > RISK_THRESHOLD else \"low risk\"\n", + " print(f\" Customer {cid}: return probability = {prob:.1%} [{flag}]\")\n" ] }, { @@ -485,8 +627,9 @@ "| Define | Python objects (Entity, FeatureView, ODFV) | Declare what features exist and how derived ones are computed |\n", "| Register | `store.apply([...])` | Write definitions to the registry (once per session) |\n", "| Train | `store.get_historical_features()` | Point-in-time join from parquet; ODFVs run inline |\n", - "| Materialize | `store.materialize()` | Push latest raw values to Redis |\n", - "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis (incl. precomputed ODFVs) |\n", + "| Preprocess | pandas / sklearn | Filter, clean, normalize before training |\n", + "| Materialize | `store.materialize()` | Push latest raw + derived values to Redis |\n", + "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis |\n", "\n", "### FeatureView vs On Demand Feature View\n", "\n", @@ -527,7 +670,7 @@ "```yaml\n", "# feast-cr.yaml (production)\n", "spec:\n", - " feastProject: my_features\n", + " feastProject: retail_features\n", " feastProjectDir:\n", " git:\n", " url: https://github.com/your-org/feast-feature-repo\n", @@ -550,10 +693,10 @@ " local:\n", " server: {} # exposes gRPC on port 6570\n", " persistence:\n", - " file:\n", - " pvc:\n", - " mountPath: /data/registry\n", - " create: {}\n", + " store:\n", + " type: sql\n", + " secretRef:\n", + " name: feast-registry-db # PostgreSQL for production\n", "```\n", "\n", "### 3. Notebooks use the remote client\n", @@ -574,17 +717,30 @@ "### 4. Materialization runs as a CronJob\n", "\n", "Instead of running `store.materialize()` from a notebook, you set up a\n", - "Kubernetes CronJob that runs on a schedule (e.g. hourly). The Feast\n", - "Operator can manage this via the `batchEngine` config.\n", + "Kubernetes CronJob that runs on a schedule (e.g. daily after your ETL\n", + "pipeline refreshes the customer data). The Feast Operator can manage this\n", + "via the `batchEngine` config.\n", + "\n", + "### 5. Use production-grade storage\n", + "\n", + "This notebook uses SQLite (registry) and single-replica Redis (online\n", + "store). For production:\n", + "\n", + "| Component | Dev (this notebook) | Production |\n", + "|-----------|--------------------|-----------|\n", + "| Registry | SQLite on `/tmp` | PostgreSQL (SQL-backed) |\n", + "| Online Store | Redis (1 replica) | Redis Cluster or managed Redis |\n", + "| Offline Store | Parquet on PVC | PostgreSQL, BigQuery, Redshift, etc. |\n", + "| Feast Server | 1 replica | Multi-replica with HPA autoscaling |\n", "\n", "### Architecture overview\n", "\n", "```\n", " You (notebook) Feast Server Pod\n", " ┌──────────────┐ ┌──────────────────────┐\n", - " │ FeatureStore │── online ──────▶│ Online Feature Server │──▶ Redis\n", - " │ (remote │── historical ──▶│ Offline Feature Server│──▶ Parquet/PVC\n", - " │ client) │── metadata ────▶│ Registry Server │──▶ SQLite/PVC\n", + " │ FeatureStore │── online ──────▶│ Online Feature Server │──▶ Redis Cluster\n", + " │ (remote │── historical ──▶│ Offline Feature Server│──▶ PostgreSQL\n", + " │ client) │── metadata ────▶│ Registry Server │──▶ PostgreSQL\n", " └──────────────┘ └──────────────────────┘\n", " ▲\n", " CronJob ── materialize ─────────────────┘\n", diff --git a/feast/feature_store.yaml b/feast/feature_store.yaml index 0d6b181..cb0d1d5 100644 --- a/feast/feature_store.yaml +++ b/feast/feature_store.yaml @@ -4,7 +4,7 @@ # For workflows running inside the cluster, you can use /tmp/registry.db # as the registry path (ephemeral, single-run). For persistent registry # access, mount the registry PVC and use /data/registry/registry.db. -project: my_features +project: retail_features provider: local offline_store: type: file From 9bae4769ef2ab32b77857473b9ecff60aefb8e30 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 16:13:04 +0200 Subject: [PATCH 08/22] Make prerequisites more prominent: link to README setup steps --- feast/feast_example.ipynb | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 0ca7854..21303ae 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -27,10 +27,16 @@ "\n", "### Prerequisites\n", "\n", - "Before running this notebook, make sure you have deployed:\n", - "- A **Redis** instance in your namespace (`redis-cr.yaml`)\n", - "- A **FeatureStore** CR in your namespace (`feast-cr.yaml`)\n", - "- The **feast-redis-config** secret (see `README.md` for setup steps)\n", + "**Before running this notebook**, follow the setup steps in\n", + "[`README.md`](README.md) first. You need to create:\n", + "\n", + "1. The **Redis password secret** (`redis-feast`)\n", + "2. The **Redis instance** (`redis-cr.yaml`) — wait until the pod is Running\n", + "3. The **Feast Redis secret** (`feast-redis-config`)\n", + "4. The **FeatureStore CR** (`feast-cr.yaml`) — wait until Ready\n", + "\n", + "If any of these are missing, the notebook will fail at the \"Configure the\n", + "Feast client\" step.\n", "\n", "> **Note:** This setup (SQLite registry, single-replica Redis, local parquet\n", "> files) is for **development and experimentation**. For a production-ready\n", From 6c02a1defe84c66d7bafc0a90fd9d1a3efd04091 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 16:46:03 +0200 Subject: [PATCH 09/22] Clarify why 'returned' label is not a Feast feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At serving time we don't know whether the customer will return — that's what the model predicts. Labels come from a separate source and are only joined during training. --- feast/feast_example.ipynb | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 21303ae..a657edd 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -284,9 +284,10 @@ "# FeatureView: declares which columns from the source are features.\n", "# These are raw/precomputed values from your data pipeline.\n", "#\n", - "# Note: \"returned\" is our label, not a feature. We include it in the source\n", - "# data but won't register it as a feature — we'll query it separately for\n", - "# training.\n", + "# Note: the source data also contains \"returned\" (did the customer return\n", + "# their last order?). We don't include it here because it's our prediction\n", + "# target — at serving time, we don't know the answer yet. Labels live\n", + "# outside Feast and are joined only during training.\n", "# ---------------------------------------------------------------------------\n", "customer_order_stats = FeatureView(\n", " name=\"customer_order_stats\",\n", @@ -439,8 +440,12 @@ "\n", "Before training, we need to clean up the data:\n", "\n", - "1. **Join the label** — `returned` is not a Feast feature (it's our\n", - " prediction target). We join it from the original data.\n", + "1. **Join the label** — `returned` is our prediction target, not a feature.\n", + " We deliberately keep it out of Feast because at serving time (when a\n", + " customer places a new order) we don't know yet whether they will return\n", + " it — that's what the model predicts. Labels typically come from a\n", + " separate source (e.g. your data warehouse) and are joined only for\n", + " training.\n", "2. **Filter out new customers** — customers with fewer than 3 orders don't\n", " have enough history for reliable features. Feeding them into the model\n", " would add noise.\n", From b78c2e4ca276e891e2d5e868fe71f02903b45905 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 16:50:33 +0200 Subject: [PATCH 10/22] =?UTF-8?q?Remove=20write=5Fto=5Fonline=5Fstore=3DTr?= =?UTF-8?q?ue=20=E2=80=94=20ODFV=20materialization=20fails=20on=20Feast=20?= =?UTF-8?q?0.63?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ODFV materialization to Redis hits a serialization bug with the entity key. Use on-the-fly computation instead, which works reliably and is fast enough for simple transformations like return_rate. --- feast/feast_example.ipynb | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index a657edd..940fe0b 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -310,9 +310,9 @@ "# return_risk: return_rate * avg_order_value — a simple risk score.\n", "# High return rate + expensive orders = high financial risk.\n", "#\n", - "# write_to_online_store=True means these are precomputed during\n", - "# materialization and stored in Redis. No on-the-fly computation at\n", - "# serving time.\n", + "# These are computed on-the-fly during get_historical_features() and\n", + "# get_online_features(). You define the formula once; Feast guarantees\n", + "# the same logic runs in training and serving.\n", "# ---------------------------------------------------------------------------\n", "@on_demand_feature_view(\n", " sources=[customer_order_stats],\n", @@ -321,7 +321,6 @@ " Field(name=\"return_risk\", dtype=Float64),\n", " ],\n", " mode=\"pandas\",\n", - " write_to_online_store=True,\n", ")\n", "def customer_risk_features(features_df: pd.DataFrame) -> pd.DataFrame:\n", " df = pd.DataFrame()\n", @@ -535,9 +534,12 @@ "## 6. Materialize features to Redis\n", "\n", "Materialization copies the latest feature values from the offline store\n", - "(parquet) into Redis for low-latency online serving. Because our ODFV uses\n", - "`write_to_online_store=True`, the derived `return_rate` and `return_risk`\n", - "features are also computed and stored in Redis during this step.\n", + "(parquet) into Redis for low-latency online serving.\n", + "\n", + "Note: only `FeatureView` data is materialized to Redis. The ODFV features\n", + "(`return_rate`, `return_risk`) are computed on-the-fly when you call\n", + "`get_online_features()` — Feast reads the raw values from Redis and applies\n", + "the transformation inline.\n", "\n", "In production you would run this on a schedule (e.g. daily after your\n", "pipeline updates the parquet files).\n" @@ -571,8 +573,9 @@ "their features, feeds them into the model, and decides whether to flag\n", "the order for proactive customer service.\n", "\n", - "All values — including the derived `return_rate` and `return_risk` —\n", - "come straight from Redis (precomputed during materialization).\n" + "The raw features (`total_orders`, etc.) come from Redis. The derived\n", + "features (`return_rate`, `return_risk`) are computed on-the-fly by the\n", + "ODFV — same formula as during training.\n" ] }, { @@ -639,15 +642,15 @@ "| Register | `store.apply([...])` | Write definitions to the registry (once per session) |\n", "| Train | `store.get_historical_features()` | Point-in-time join from parquet; ODFVs run inline |\n", "| Preprocess | pandas / sklearn | Filter, clean, normalize before training |\n", - "| Materialize | `store.materialize()` | Push latest raw + derived values to Redis |\n", - "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis |\n", + "| Materialize | `store.materialize()` | Push latest raw values to Redis |\n", + "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis + ODFV computed inline |\n", "\n", "### FeatureView vs On Demand Feature View\n", "\n", "| | FeatureView | On Demand Feature View |\n", "|-|-------------|------------------------|\n", "| Data | Precomputed in your pipeline | Computed by Feast at query time |\n", - "| Materialized to Redis? | Yes | Yes (with `write_to_online_store=True`) or No (computed on-the-fly) |\n", + "| Materialized to Redis? | Yes | No — computed on-the-fly at query time |\n", "| Good for | Raw/heavy features | Lightweight derived features, request-time inputs |\n", "| Consistency | You ensure pipeline runs | Feast guarantees same logic in training & serving |\n", "\n", From a6c9bc3637c25222f40d1c6981f9ba8a823b5547 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 16:55:19 +0200 Subject: [PATCH 11/22] Fix prod recommendations: Redis and parquet are prod-ready on prokube Only the SQLite registry on /tmp is not production-ready. Redis is managed by the OpsTree operator (prokube provides this), and parquet on PVC or S3/MinIO is fine for the offline store. --- feast/README.md | 9 ++++----- feast/feast_example.ipynb | 23 ++++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/feast/README.md b/feast/README.md index e7570ce..2636837 100644 --- a/feast/README.md +++ b/feast/README.md @@ -7,11 +7,10 @@ feature management in ML workflows. return their next order. The notebook walks through defining customer features, training a return-risk model, and serving predictions in real time. -> **Note:** This example uses SQLite (registry), single-replica Redis (online -> store), and local parquet files (offline store). This is fine for development -> and experimentation. For production, use PostgreSQL-backed registry, Redis -> Cluster, and a proper data warehouse. See the "Production Setup" section in -> the notebook for details. +> **Note:** This example uses a SQLite registry on `/tmp` which does not survive +> pod restarts. Redis (managed by the OpsTree operator) and the parquet offline +> store are production-ready. For a persistent registry, switch to PostgreSQL — +> see the "Production Setup" section in the notebook. ## Prerequisites diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 940fe0b..2255477 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -38,9 +38,9 @@ "If any of these are missing, the notebook will fail at the \"Configure the\n", "Feast client\" step.\n", "\n", - "> **Note:** This setup (SQLite registry, single-replica Redis, local parquet\n", - "> files) is for **development and experimentation**. For a production-ready\n", - "> deployment, see the [Production Setup](#Production-Setup) section at the end.\n" + "> **Note:** This setup uses a SQLite registry on `/tmp` which does not\n", + "> survive pod restarts. For a production-ready deployment, see the\n", + "> [Production Setup](#Production-Setup) section at the end.\n" ] }, { @@ -737,14 +737,15 @@ "\n", "### 5. Use production-grade storage\n", "\n", - "This notebook uses SQLite (registry) and single-replica Redis (online\n", - "store). For production:\n", + "The only component in this notebook that is **not** production-ready is\n", + "the SQLite registry on `/tmp`. Redis and the parquet offline store are\n", + "fine for production:\n", "\n", - "| Component | Dev (this notebook) | Production |\n", + "| Component | This notebook | Production |\n", "|-----------|--------------------|-----------|\n", - "| Registry | SQLite on `/tmp` | PostgreSQL (SQL-backed) |\n", - "| Online Store | Redis (1 replica) | Redis Cluster or managed Redis |\n", - "| Offline Store | Parquet on PVC | PostgreSQL, BigQuery, Redshift, etc. |\n", + "| Registry | SQLite on `/tmp` (ephemeral) | PostgreSQL (SQL-backed, persistent) |\n", + "| Online Store | Redis via OpsTree operator | Same — prokube manages this for you |\n", + "| Offline Store | Parquet on PVC | Same — or S3/MinIO for larger datasets |\n", "| Feast Server | 1 replica | Multi-replica with HPA autoscaling |\n", "\n", "### Architecture overview\n", @@ -752,8 +753,8 @@ "```\n", " You (notebook) Feast Server Pod\n", " ┌──────────────┐ ┌──────────────────────┐\n", - " │ FeatureStore │── online ──────▶│ Online Feature Server │──▶ Redis Cluster\n", - " │ (remote │── historical ──▶│ Offline Feature Server│──▶ PostgreSQL\n", + " │ FeatureStore │── online ──────▶│ Online Feature Server │──▶ Redis\n", + " │ (remote │── historical ──▶│ Offline Feature Server│──▶ Parquet / S3\n", " │ client) │── metadata ────▶│ Registry Server │──▶ PostgreSQL\n", " └──────────────┘ └──────────────────────┘\n", " ▲\n", From 5fc2af18a0d8f459cdb2d66f1ccff938270258b4 Mon Sep 17 00:00:00 2001 From: hsteude Date: Mon, 4 May 2026 16:58:08 +0200 Subject: [PATCH 12/22] =?UTF-8?q?Tone=20down=20prod=20recommendations=20?= =?UTF-8?q?=E2=80=94=20less=20explicit=20about=20platform=20specifics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- feast/README.md | 5 ++--- feast/feast_example.ipynb | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/feast/README.md b/feast/README.md index 2636837..b88acfa 100644 --- a/feast/README.md +++ b/feast/README.md @@ -8,9 +8,8 @@ return their next order. The notebook walks through defining customer features, training a return-risk model, and serving predictions in real time. > **Note:** This example uses a SQLite registry on `/tmp` which does not survive -> pod restarts. Redis (managed by the OpsTree operator) and the parquet offline -> store are production-ready. For a persistent registry, switch to PostgreSQL — -> see the "Production Setup" section in the notebook. +> pod restarts. For a persistent registry, switch to PostgreSQL — see the +> "Production Setup" section in the notebook. ## Prerequisites diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 2255477..d3cab9f 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -738,13 +738,12 @@ "### 5. Use production-grade storage\n", "\n", "The only component in this notebook that is **not** production-ready is\n", - "the SQLite registry on `/tmp`. Redis and the parquet offline store are\n", - "fine for production:\n", + "the SQLite registry on `/tmp`.\n", "\n", "| Component | This notebook | Production |\n", "|-----------|--------------------|-----------|\n", "| Registry | SQLite on `/tmp` (ephemeral) | PostgreSQL (SQL-backed, persistent) |\n", - "| Online Store | Redis via OpsTree operator | Same — prokube manages this for you |\n", + "| Online Store | Redis | Redis (already persistent) |\n", "| Offline Store | Parquet on PVC | Same — or S3/MinIO for larger datasets |\n", "| Feast Server | 1 replica | Multi-replica with HPA autoscaling |\n", "\n", From b2fed916acee1d06d46c3824a4e0a48c92a0709f Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 4 May 2026 20:22:34 +0200 Subject: [PATCH 13/22] Use the operator-managed remote registry instead of /tmp/registry.db The notebook now auto-discovers the FeatureStore CR in the current namespace, reads the operator-published feast--client ConfigMap, and writes a feature_store.yaml that points at the registry gRPC server backed by the operator's PVC. Feature definitions persist across notebook restarts and are visible to every other client in the namespace. - feast-cr.yaml: enable services.registry.local.server: {} and disable the istio sidecar on the feast-server pod (sidecar.istio.io/inject: false). The operator generates the registry Service with port name 'http' and no appProtocol, so istio mis-classifies gRPC traffic; the registry only carries metadata, so dropping mTLS here has minimal impact. - feast_example.ipynb: read project + remote registry from the operator ConfigMap; override online_store with direct Redis (read from the secret referenced by the CR) so materialize() works from the notebook. - Remove on-demand feature views: feast 0.63 hangs when invoking ODFV UDFs loaded from a remote registry. Compute return_rate/return_risk in plain pandas after get_historical_features / get_online_features. - README rewritten to describe the new flow. - feast-notebook-rbac.yaml: ClusterRole granting notebook SAs read access to FeatureStore CRs (already covered by kubeflow-roles on prokube; kept for portability to other Kubeflow installs). --- feast/README.md | 117 +++++++----- feast/feast-cr.yaml | 10 + feast/feast-notebook-rbac.yaml | 16 ++ feast/feast_example.ipynb | 322 ++++++++++++++------------------- 4 files changed, 231 insertions(+), 234 deletions(-) create mode 100644 feast/feast-notebook-rbac.yaml diff --git a/feast/README.md b/feast/README.md index b88acfa..0b732bc 100644 --- a/feast/README.md +++ b/feast/README.md @@ -7,14 +7,17 @@ feature management in ML workflows. return their next order. The notebook walks through defining customer features, training a return-risk model, and serving predictions in real time. -> **Note:** This example uses a SQLite registry on `/tmp` which does not survive -> pod restarts. For a persistent registry, switch to PostgreSQL — see the -> "Production Setup" section in the notebook. +The notebook talks to the **registry gRPC server** that the Feast Operator +exposes from the FeatureStore CR. Feature definitions you `apply()` from the +notebook persist on the operator-managed PVC and are visible to every other +client in the namespace. ## Prerequisites - Feast must be enabled on your cluster (ask your admin) - You have `kubectl` access to your Kubeflow profile namespace +- A cluster admin has applied `feast-notebook-rbac.yaml` once per cluster + (grants notebook ServiceAccounts read access to FeatureStore CRs) ## Quick Start @@ -51,7 +54,7 @@ kubectl create secret generic feast-redis-config \ rm /tmp/redis-config.yaml ``` -### 3. Deploy a FeatureStore +### 3. Deploy the FeatureStore Edit `feast-cr.yaml` to set your namespace, then: @@ -60,28 +63,41 @@ kubectl apply -f feast-cr.yaml kubectl get featurestore -n -w # wait until Ready ``` +This CR enables the **registry gRPC server** (`services.registry.local.server`) +so the notebook can read and write feature definitions remotely. It also sets +`sidecar.istio.io/inject: "false"` on the feast-server pod — the registry only +carries feature *metadata*, and istio's protocol detection mis-classifies the +operator's registry Service as HTTP/1.1, breaking gRPC. See "Known limitations" +below for details. + ### 4. Run the notebook -Open `feast_example.ipynb` in your Kubeflow notebook. The notebook reads the -`feast-redis-config` secret automatically and builds `feature_store.yaml` for you. +Open `feast_example.ipynb` in your Kubeflow notebook. The first cell +auto-discovers the FeatureStore CR in the current namespace, reads the +operator-published `feast--client` ConfigMap and the Redis secret it +references, then writes a `feature_store.yaml` that points at the remote +registry and the local Redis online store. ## Files | File | What it is | |------|------------| | `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | -| `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR | +| `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR with registry server enabled | +| `feast-notebook-rbac.yaml` | ClusterRole granting notebook SAs read access to FeatureStore CRs (apply once per cluster; not needed on prokube — already in `kubeflow-roles`) | | `feature_store.yaml` | Feast SDK config template — the notebook generates this automatically | | `feast_example.ipynb` | End-to-end notebook: retail return prediction with Feast | -### Why two YAML files? +### Why two Feast YAML files? -`feast-cr.yaml` is a **Kubernetes resource** (`kind: FeatureStore`) that the operator -reads to provision PVCs and the Feast server pod. You apply it once with `kubectl`. +`feast-cr.yaml` is a **Kubernetes resource** (`kind: FeatureStore`) that the +operator reads to provision PVCs, the Feast server pod, and the registry gRPC +service. You apply it once with `kubectl`. -`feature_store.yaml` is a **Feast SDK config file** (fixed filename — Feast convention) -that the Python client and CLI read to know how to connect to the registry and stores. -You use it in notebooks and scripts. +`feature_store.yaml` is a **Feast SDK config file** (fixed filename — Feast +convention) that the Python client and CLI read to know how to connect to the +registry and stores. The notebook builds it for you from the operator's +client ConfigMap; you don't edit it directly. ## Architecture @@ -89,40 +105,49 @@ Feast has three stores. Here is what each one does and which backend prokube use | Store | Purpose | Prokube default | Alternatives | |-------|---------|-----------------|--------------| -| **Registry** | Stores feature definitions (entities, feature views, sources). Written on `feast apply`, read at startup. | SQLite on PVC | SQL databases (PostgreSQL, etc.) for multi-replica or shared setups | -| **Online store** | Holds the *latest* feature value per entity. Read on every inference request — latency critical. | Redis (your `Redis` CR) | SQLite on PVC (dev/test only; not multi-replica safe) | -| **Offline store** | Historical feature records for point-in-time joins during training. Batch workload, not on serving path. | Parquet/file on PVC | Dask (same parquet files, distributed compute — use only if data exceeds pod memory); cloud warehouses (BigQuery, Snowflake, Redshift) | - -The offline store default is `type: file` (pandas). You can switch to `type: dask` in -`feast-cr.yaml` if your datasets are too large to fit in memory, but it adds complexity -and is rarely needed. +| **Registry** | Feature definitions (entities, feature views, sources). Written on `apply()`, read at startup. | gRPC server backed by SQLite on PVC | PostgreSQL for multi-replica feast-server | +| **Online store** | Latest feature value per entity. Read on every inference — latency critical. | Redis (your `Redis` CR) | SQLite on PVC (dev/test only) | +| **Offline store** | Historical feature records for point-in-time joins during training. | Parquet/file on PVC | Dask (distributed); cloud warehouses | ``` - ┌─────────────────────────────────┐ - │ Your Namespace │ - │ │ - │ Redis CR (redis-feast) │ - │ - your private Redis instance │ - │ │ -store.apply() ─────▶ SQLite /tmp/registry.db │ - (notebook) │ - feature definitions │ - │ - entity schemas │ - │ │ - materialize ──────▶ Redis online store │ - │ - latest feature values │ - │ - sub-ms latency │ - │ - persistent across sessions │ - │ │ - historical ──────▶ Parquet on PVC (offline store) │ - features │ - time-series feature data │ - │ │ - │ Feast Server pod │ - │ - HTTP API for online features │ - │ - registry on PVC (/data/...) │ - └─────────────────────────────────┘ + ┌──────────────────────────────────────┐ + │ Your Namespace │ + │ │ + │ Redis CR (redis-feast) │ + │ - your private Redis instance │ + │ │ + store.apply() ──gRPC──▶ Registry Server (Feast Operator)│ + (notebook) │ - feature definitions on PVC │ + │ │ + materialize ──────▶ Redis online store │ + │ - latest feature values │ + │ - sub-ms latency │ + │ │ + historical ──────▶ Parquet on PVC (offline store) │ + features │ - time-series feature data │ + │ │ + │ Feast Server pod │ + │ - registry gRPC :6570 │ + │ - online HTTP │ + │ - PVCs for registry & offline data │ + └──────────────────────────────────────┘ ``` -- **Redis** (per-namespace): your private online store. You own and manage it. -- **Registry** (SQLite): feature definitions. In notebook workflows, uses `/tmp/registry.db`. - The Feast server pod uses the registry PVC at `/data/registry/registry.db`. -- **Offline store** (parquet/PVC): historical feature data for training. +## Known limitations + +- **No on-demand feature views.** Feast 0.63 has a bug where ODFVs round-tripped + through a remote registry hang on invocation (the deserialized UDF object + gets stuck somewhere in the typeguard-instrumented code path). Until that's + fixed upstream, the notebook computes derived columns in plain pandas after + `get_historical_features` / `get_online_features`. Switch to a local registry + if you need ODFVs. +- **Istio sidecar disabled on the feast-server pod.** The operator generates + the registry Service with port name `http` and no `appProtocol`, so istio + mis-classifies gRPC traffic and breaks it. The simplest fix — used in + `feast-cr.yaml` — is `sidecar.istio.io/inject: "false"` on the feast-server + pod. The registry only carries feature *metadata* (entity schemas, feature + view names, data source paths), so the impact is small. Feature *values* in + Redis and on the offline-store PVC are unaffected. Rely on NetworkPolicy at + the namespace level for cross-namespace isolation. +- **Notebook RBAC** for FeatureStore CRs comes from `kubeflow-roles` on + prokube. On other Kubeflow installs, apply `feast-notebook-rbac.yaml`. diff --git a/feast/feast-cr.yaml b/feast/feast-cr.yaml index fcd044d..229e78c 100644 --- a/feast/feast-cr.yaml +++ b/feast/feast-cr.yaml @@ -19,8 +19,18 @@ spec: runFeastApplyOnInit: false securityContext: runAsUser: 0 + # The operator-generated registry Service uses port name "http" with no + # appProtocol, which makes istio mis-classify gRPC traffic as HTTP/1.1 + # and break it. The registry only carries feature *metadata* (schemas, + # data source paths), not feature values, so the simplest fix is to skip + # sidecar injection on the feast-server pod. + podAnnotations: + sidecar.istio.io/inject: "false" registry: local: + # Expose the registry as a gRPC server so notebooks/clients can read + # and write feature definitions remotely (production pattern). + server: {} persistence: file: pvc: diff --git a/feast/feast-notebook-rbac.yaml b/feast/feast-notebook-rbac.yaml new file mode 100644 index 0000000..b72a46c --- /dev/null +++ b/feast/feast-notebook-rbac.yaml @@ -0,0 +1,16 @@ +# Grants Kubeflow notebook users (default-editor SA in profile namespaces) +# read access to FeatureStore CRs in their own namespace. +# +# Apply this once at cluster install time. It piggybacks on the +# `kubeflow-edit` aggregated ClusterRole, so any namespace where +# default-editor is bound to kubeflow-edit picks it up automatically. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: aggregate-to-kubeflow-edit-feast + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" +rules: +- apiGroups: ["feast.dev"] + resources: ["featurestores"] + verbs: ["get", "list", "watch"] diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index d3cab9f..3e0216a 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -59,7 +59,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -q 'feast[redis]' scikit-learn" + "!pip install -q 'feast[redis]' grpcio scikit-learn\n" ] }, { @@ -69,16 +69,17 @@ "source": [ "### Configure the Feast client\n", "\n", - "We build `feature_store.yaml` dynamically by reading the Redis connection\n", - "string from the `feast-redis-config` secret.\n", - "\n", - "**About the registry path (`/tmp/registry.db`):**\n", - "The registry is a small SQLite database that stores feature definitions\n", - "(entities, feature views, data sources). We write it to `/tmp` which means\n", - "it does **not** survive a pod restart. That's fine — the registry only holds\n", - "*definitions*, not data. Your actual feature *values* live in Redis (online\n", - "store, persistent) and parquet files (offline store, on PVC). Just re-run\n", - "the \"Define & Register\" cell after a restart to recreate it.\n" + "Instead of building `feature_store.yaml` from scratch, we read the\n", + "operator-published `feast--client` **ConfigMap**. That file already\n", + "contains the right project name and a `registry_type: remote` pointing at\n", + "the registry gRPC server backed by a PVC — so feature definitions persist\n", + "across notebook restarts and are shared with everything else in the namespace.\n", + "\n", + "We then override the `online_store` with a direct Redis connection (read\n", + "from the secret referenced by the FeatureStore CR) so that `materialize()`\n", + "can write feature values from this notebook. In a fully production setup,\n", + "materialization runs as a server-side CronJob and the notebook would keep\n", + "the remote online store config too — see the *Production Setup* section.\n" ] }, { @@ -89,10 +90,16 @@ "outputs": [], "source": [ "import base64\n", + "import json\n", "import subprocess\n", + "\n", "import yaml\n", "\n", "\n", + "def kubectl_json(*args):\n", + " return json.loads(subprocess.check_output([\"kubectl\", *args, \"-o\", \"json\"]))\n", + "\n", + "\n", "def get_namespace():\n", " \"\"\"Read the current namespace from the pod's service account.\"\"\"\n", " try:\n", @@ -104,43 +111,50 @@ " ).decode().strip()\n", "\n", "\n", - "def get_redis_connection_string():\n", - " \"\"\"Read the Redis connection string from the feast-redis-config secret.\"\"\"\n", - " result = subprocess.run(\n", - " [\"kubectl\", \"get\", \"secret\", \"feast-redis-config\",\n", - " \"-o\", \"jsonpath={.data.redis}\"],\n", - " capture_output=True, text=True, check=True,\n", - " )\n", - " raw = base64.b64decode(result.stdout).decode()\n", - " return yaml.safe_load(raw)[\"connection_string\"]\n", + "# 1. Find the FeatureStore CR in this namespace.\n", + "fs_list = kubectl_json(\"get\", \"featurestore\")[\"items\"]\n", + "if not fs_list:\n", + " raise RuntimeError(\"No FeatureStore CR found — apply feast-cr.yaml first.\")\n", + "fs = fs_list[0]\n", + "fs_name = fs[\"metadata\"][\"name\"]\n", + "client_cm_name = fs[\"status\"][\"clientConfigMap\"]\n", + "redis_secret_name = (\n", + " fs[\"spec\"][\"services\"][\"onlineStore\"][\"persistence\"][\"store\"][\"secretRef\"][\"name\"]\n", + ")\n", + "redis_secret_key = (\n", + " fs[\"spec\"][\"services\"][\"onlineStore\"][\"persistence\"][\"store\"].get(\"secretKeyName\", \"redis\")\n", + ")\n", + "\n", + "# 2. Read the operator-published client config (project + remote registry).\n", + "# feast-cr.yaml disables the istio sidecar on the feast-server pod\n", + "# (sidecar.istio.io/inject: \"false\") so gRPC reaches the registry directly\n", + "# via the operator's default Service.\n", + "client_cm = kubectl_json(\"get\", \"cm\", client_cm_name)\n", + "config = yaml.safe_load(client_cm[\"data\"][\"feature_store.yaml\"])\n", "\n", + "# 3. Read the Redis connection string from the secret referenced by the CR.\n", + "redis_secret = kubectl_json(\"get\", \"secret\", redis_secret_name)\n", + "redis_yaml = base64.b64decode(redis_secret[\"data\"][redis_secret_key]).decode()\n", + "redis_conn = yaml.safe_load(redis_yaml)[\"connection_string\"]\n", "\n", - "NAMESPACE = get_namespace()\n", - "REDIS_CONNECTION_STRING = get_redis_connection_string()\n", - "FEAST_PROJECT = \"retail_features\"\n", - "\n", - "feature_store_yaml = (\n", - " f\"project: {FEAST_PROJECT}\\n\"\n", - " \"provider: local\\n\"\n", - " \"offline_store:\\n\"\n", - " \" type: file\\n\"\n", - " \"online_store:\\n\"\n", - " \" type: redis\\n\"\n", - " f\" connection_string: \\\"{REDIS_CONNECTION_STRING}\\\"\\n\"\n", - " \"registry:\\n\"\n", - " \" registry_type: file\\n\"\n", - " \" path: /tmp/registry.db\\n\"\n", - " \"auth:\\n\"\n", - " \" type: no_auth\\n\"\n", - " \"entity_key_serialization_version: 3\\n\"\n", - ")\n", + "# 4. Override online_store with a direct Redis connection so materialize()\n", + "# works from this notebook. Add a local-file offline store for the parquet\n", + "# we generate below.\n", + "config[\"online_store\"] = {\"type\": \"redis\", \"connection_string\": redis_conn}\n", + "config[\"offline_store\"] = {\"type\": \"file\"}\n", "\n", "with open(\"feature_store.yaml\", \"w\") as f:\n", - " f.write(feature_store_yaml)\n", + " yaml.safe_dump(config, f, sort_keys=False)\n", "\n", - "print(\"feature_store.yaml written\")\n", - "print(f\"Namespace: {NAMESPACE}\")\n", - "print(f\"Redis: {REDIS_CONNECTION_STRING.split(',')[0]}\")\n" + "NAMESPACE = get_namespace()\n", + "FEAST_PROJECT = config[\"project\"]\n", + "\n", + "print(f\"FeatureStore CR: {fs_name}\")\n", + "print(f\"Project: {FEAST_PROJECT}\")\n", + "print(f\"Namespace: {NAMESPACE}\")\n", + "print(f\"Registry (remote): {config['registry']['path']}\")\n", + "print(f\"Online store: redis @ {redis_conn.split(',')[0]}\")\n", + "print(\"\\nfeature_store.yaml written.\")\n" ] }, { @@ -232,20 +246,19 @@ "---\n", "## 3. Define features\n", "\n", - "In Feast you define features as Python objects. There are two kinds:\n", - "\n", - "- **FeatureView**: maps to columns in an existing data source (parquet file,\n", - " database table, etc.). Feast stores and serves them — but doesn't compute\n", - " anything. Your pipeline is responsible for producing the data.\n", + "In Feast, features are defined as Python objects:\n", "\n", - "- **On Demand Feature View (ODFV)**: a lightweight transformation that Feast\n", - " executes at query time. It can combine existing features, add request-time\n", - " inputs, or compute derived values. The transformation runs inline — during\n", - " `get_historical_features()` and `get_online_features()` — so it's always\n", - " consistent between training and serving.\n", + "- **Entity**: the primary key for lookups (here, `customer_id`).\n", + "- **DataSource**: where the raw feature data lives (parquet, table, etc.).\n", + "- **FeatureView**: declares which columns from the source are features and\n", + " how long they're valid (`ttl`). Feast stores and serves them — but doesn't\n", + " compute anything. Your data pipeline is responsible for producing the data.\n", "\n", - "Use FeatureViews for raw/precomputed data. Use ODFVs for derived features\n", - "that should be computed the same way everywhere.\n" + "Derived features (`return_rate = total_returns / total_orders`, etc.) are\n", + "plain pandas computations in this notebook. Feast 0.63 also offers\n", + "*on-demand feature views* for serving-time transformations, but they don't\n", + "yet work reliably with the Operator-managed remote registry, so we skip\n", + "them here.\n" ] }, { @@ -258,8 +271,7 @@ "from datetime import timedelta\n", "\n", "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, ValueType\n", - "from feast.on_demand_feature_view import on_demand_feature_view\n", - "from feast.types import Float32, Float64, Int64\n", + "from feast.types import Float32, Int64\n", "\n", "# ---------------------------------------------------------------------------\n", "# Entity: the \"primary key\" for feature lookups.\n", @@ -303,32 +315,6 @@ " online=True,\n", ")\n", "\n", - "# ---------------------------------------------------------------------------\n", - "# On Demand Feature View: derived features computed by Feast.\n", - "#\n", - "# return_rate: what fraction of orders did this customer return?\n", - "# return_risk: return_rate * avg_order_value — a simple risk score.\n", - "# High return rate + expensive orders = high financial risk.\n", - "#\n", - "# These are computed on-the-fly during get_historical_features() and\n", - "# get_online_features(). You define the formula once; Feast guarantees\n", - "# the same logic runs in training and serving.\n", - "# ---------------------------------------------------------------------------\n", - "@on_demand_feature_view(\n", - " sources=[customer_order_stats],\n", - " schema=[\n", - " Field(name=\"return_rate\", dtype=Float64),\n", - " Field(name=\"return_risk\", dtype=Float64),\n", - " ],\n", - " mode=\"pandas\",\n", - ")\n", - "def customer_risk_features(features_df: pd.DataFrame) -> pd.DataFrame:\n", - " df = pd.DataFrame()\n", - " df[\"return_rate\"] = features_df[\"total_returns\"] / features_df[\"total_orders\"].clip(lower=1)\n", - " df[\"return_risk\"] = df[\"return_rate\"] * features_df[\"avg_order_value\"]\n", - " return df\n", - "\n", - "\n", "print(\"Feature definitions created (not yet registered).\")\n" ] }, @@ -358,14 +344,11 @@ " customer,\n", " customer_orders_source,\n", " customer_order_stats,\n", - " customer_risk_features,\n", "])\n", "\n", - "print(\"Registered:\")\n", + "print(\"Registered feature views in the remote registry:\")\n", "for fv in store.list_feature_views():\n", - " print(f\" FeatureView: {fv.name}\")\n", - "for odfv in store.list_on_demand_feature_views():\n", - " print(f\" OnDemandFeatureView: {odfv.name}\")\n" + " print(f\" - {fv.name}\")\n" ] }, { @@ -381,15 +364,11 @@ "This prevents data leakage — you only see features that were available when\n", "the event occurred.\n", "\n", - "Note how we request both raw features (`customer_order_stats:total_orders`)\n", - "and derived features (`customer_risk_features:return_rate`). The ODFV runs\n", - "automatically — no extra code needed.\n", - "\n", - "> **Note:** Feast will show a `RuntimeWarning` that on-demand feature views\n", - "> are experimental and don't scale well for offline retrieval. For this\n", - "> notebook-sized dataset that's irrelevant. For large-scale training data\n", - "> (millions of rows), precompute heavy features in your pipeline and use a\n", - "> regular `FeatureView` instead.\n" + "After Feast returns the raw features, we compute two derived columns\n", + "(`return_rate` and `return_risk`) in plain pandas. Keeping them outside\n", + "Feast keeps this example simple; in a real pipeline you'd either precompute\n", + "them upstream and add them to the FeatureView, or use an on-demand feature\n", + "view once the operator/feast-version combo supports it.\n" ] }, { @@ -421,11 +400,15 @@ " \"customer_order_stats:total_returns\",\n", " \"customer_order_stats:avg_order_value\",\n", " \"customer_order_stats:days_since_last_order\",\n", - " \"customer_risk_features:return_rate\", # <-- computed by Feast\n", - " \"customer_risk_features:return_risk\", # <-- computed by Feast\n", " ],\n", ").to_df()\n", "\n", + "# Derived features (computed locally — kept out of Feast for now)\n", + "training_df[\"return_rate\"] = (\n", + " training_df[\"total_returns\"] / training_df[\"total_orders\"].clip(lower=1)\n", + ")\n", + "training_df[\"return_risk\"] = training_df[\"return_rate\"] * training_df[\"avg_order_value\"]\n", + "\n", "print(f\"Retrieved {len(training_df)} rows.\")\n", "training_df.head(10)\n" ] @@ -570,12 +553,9 @@ "## 7. Online feature serving — predict return risk\n", "\n", "A customer just placed an order. Your order service calls Feast to get\n", - "their features, feeds them into the model, and decides whether to flag\n", - "the order for proactive customer service.\n", - "\n", - "The raw features (`total_orders`, etc.) come from Redis. The derived\n", - "features (`return_rate`, `return_risk`) are computed on-the-fly by the\n", - "ODFV — same formula as during training.\n" + "their features from Redis, computes the same derived columns we used in\n", + "training, feeds the result into the model, and decides whether to flag the\n", + "order for proactive customer service.\n" ] }, { @@ -594,14 +574,19 @@ " \"customer_order_stats:total_returns\",\n", " \"customer_order_stats:avg_order_value\",\n", " \"customer_order_stats:days_since_last_order\",\n", - " \"customer_risk_features:return_rate\",\n", - " \"customer_risk_features:return_risk\",\n", " ],\n", " entity_rows=customers_with_new_orders,\n", ").to_dict()\n", "\n", - "print(\"Online features (from Redis):\")\n", "online_df = pd.DataFrame(online_features)\n", + "\n", + "# Same derived features as during training — computed in pandas.\n", + "online_df[\"return_rate\"] = (\n", + " online_df[\"total_returns\"] / online_df[\"total_orders\"].clip(lower=1)\n", + ")\n", + "online_df[\"return_risk\"] = online_df[\"return_rate\"] * online_df[\"avg_order_value\"]\n", + "\n", + "print(\"Online features (raw from Redis + locally derived columns):\")\n", "online_df\n" ] }, @@ -638,28 +623,21 @@ "\n", "| Step | API | What happens |\n", "|------|-----|-------------|\n", - "| Define | Python objects (Entity, FeatureView, ODFV) | Declare what features exist and how derived ones are computed |\n", - "| Register | `store.apply([...])` | Write definitions to the registry (once per session) |\n", - "| Train | `store.get_historical_features()` | Point-in-time join from parquet; ODFVs run inline |\n", - "| Preprocess | pandas / sklearn | Filter, clean, normalize before training |\n", + "| Define | Python objects (Entity, FeatureView) | Declare what features exist |\n", + "| Register | `store.apply([...])` | Write definitions to the **remote registry** served by the Feast Operator |\n", + "| Train | `store.get_historical_features()` | Point-in-time join from parquet |\n", + "| Preprocess | pandas / sklearn | Derive columns, filter, clean, normalize |\n", "| Materialize | `store.materialize()` | Push latest raw values to Redis |\n", - "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis + ODFV computed inline |\n", - "\n", - "### FeatureView vs On Demand Feature View\n", - "\n", - "| | FeatureView | On Demand Feature View |\n", - "|-|-------------|------------------------|\n", - "| Data | Precomputed in your pipeline | Computed by Feast at query time |\n", - "| Materialized to Redis? | Yes | No — computed on-the-fly at query time |\n", - "| Good for | Raw/heavy features | Lightweight derived features, request-time inputs |\n", - "| Consistency | You ensure pipeline runs | Feast guarantees same logic in training & serving |\n", + "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis |\n", "\n", "### About the registry\n", "\n", - "The registry (`/tmp/registry.db`) stores only *definitions* — not feature\n", - "values. It is ephemeral in this notebook setup. If your pod restarts, re-run\n", - "the \"Define & Register\" cells. Your feature *data* in Redis and on PVC is\n", - "not affected.\n" + "The registry is the gRPC service deployed by the Feast Operator\n", + "(`feast--registry`) and backed by the persistent volume on the\n", + "FeatureStore CR. Feature *definitions* you `apply()` from this notebook\n", + "are visible to every other client in the namespace and survive pod\n", + "restarts. Feature *data* lives in Redis (online) and parquet on the PVC\n", + "(offline).\n" ] }, { @@ -668,21 +646,19 @@ "metadata": {}, "source": [ "---\n", - "## Production Setup\n", + "## Production hardening\n", "\n", - "This notebook uses a **local, interactive workflow** — you define features\n", - "inline, register them with `store.apply()`, and connect directly to Redis.\n", - "That's great for experimentation. In production, the architecture looks\n", - "different:\n", + "This notebook already uses the production registry served by the Feast\n", + "Operator. To make the rest of the workflow production-grade:\n", "\n", "### 1. Feature definitions live in Git\n", "\n", - "Instead of defining features in a notebook, you put them in a `features.py`\n", - "file in a Git repository. The Feast Operator clones the repo on startup and\n", - "runs `feast apply` automatically:\n", + "Instead of defining features in a notebook, put them in a `features.py`\n", + "file in a Git repository. The Feast Operator clones the repo on startup\n", + "and runs `feast apply` automatically:\n", "\n", "```yaml\n", - "# feast-cr.yaml (production)\n", + "# feast-cr.yaml\n", "spec:\n", " feastProject: retail_features\n", " feastProjectDir:\n", @@ -691,75 +667,45 @@ " ref: main # or pin to a commit SHA\n", "```\n", "\n", - "This means feature definitions are version-controlled, reviewed via PRs,\n", - "and automatically deployed when the pod starts.\n", + "Feature definitions are then version-controlled, reviewed via PRs, and\n", + "automatically deployed when the pod starts. Notebooks shift from being\n", + "authors of definitions to consumers of them.\n", + "\n", + "### 2. Materialization runs as a CronJob\n", "\n", - "### 2. The Feast Server exposes a Registry Server\n", + "Instead of running `store.materialize()` from a notebook, set up a\n", + "Kubernetes CronJob that runs on a schedule (e.g. daily, after your ETL\n", + "pipeline refreshes the customer data). The Feast Operator can manage this\n", + "via the `batchEngine` config.\n", + "\n", + "### 3. Use a SQL-backed registry\n", "\n", - "Add `server: {}` to the registry config to expose it as a gRPC endpoint.\n", - "Notebooks and other clients can then read feature metadata remotely:\n", + "Switch the registry persistence from PVC-backed SQLite to PostgreSQL —\n", + "better for multi-replica feast-server deployments and concurrent writes.\n", "\n", "```yaml\n", - "# feast-cr.yaml (production)\n", "spec:\n", " services:\n", " registry:\n", " local:\n", - " server: {} # exposes gRPC on port 6570\n", + " server: {}\n", " persistence:\n", " store:\n", " type: sql\n", " secretRef:\n", - " name: feast-registry-db # PostgreSQL for production\n", + " name: feast-registry-db\n", "```\n", "\n", - "### 3. Notebooks use the remote client\n", - "\n", - "The Feast Operator creates a ConfigMap (`feast--client`) with the\n", - "client config. Instead of building `feature_store.yaml` manually, your\n", - "notebook just mounts it or copies it:\n", - "\n", - "```python\n", - "# Production notebook — no local registry, no direct Redis\n", - "store = FeatureStore(repo_path=\".\") # reads feature_store.yaml from ConfigMap\n", - "\n", - "# get_online_features goes through the Feast Server (HTTP)\n", - "# get_historical_features goes through the Offline Server (Arrow Flight)\n", - "# Feature metadata comes from the Registry Server (gRPC)\n", - "```\n", - "\n", - "### 4. Materialization runs as a CronJob\n", - "\n", - "Instead of running `store.materialize()` from a notebook, you set up a\n", - "Kubernetes CronJob that runs on a schedule (e.g. daily after your ETL\n", - "pipeline refreshes the customer data). The Feast Operator can manage this\n", - "via the `batchEngine` config.\n", - "\n", - "### 5. Use production-grade storage\n", - "\n", - "The only component in this notebook that is **not** production-ready is\n", - "the SQLite registry on `/tmp`.\n", + "### 4. Component overview\n", "\n", "| Component | This notebook | Production |\n", - "|-----------|--------------------|-----------|\n", - "| Registry | SQLite on `/tmp` (ephemeral) | PostgreSQL (SQL-backed, persistent) |\n", - "| Online Store | Redis | Redis (already persistent) |\n", - "| Offline Store | Parquet on PVC | Same — or S3/MinIO for larger datasets |\n", - "| Feast Server | 1 replica | Multi-replica with HPA autoscaling |\n", - "\n", - "### Architecture overview\n", - "\n", - "```\n", - " You (notebook) Feast Server Pod\n", - " ┌──────────────┐ ┌──────────────────────┐\n", - " │ FeatureStore │── online ──────▶│ Online Feature Server │──▶ Redis\n", - " │ (remote │── historical ──▶│ Offline Feature Server│──▶ Parquet / S3\n", - " │ client) │── metadata ────▶│ Registry Server │──▶ PostgreSQL\n", - " └──────────────┘ └──────────────────────┘\n", - " ▲\n", - " CronJob ── materialize ─────────────────┘\n", - " Git repo ── feast apply (on pod start) ─┘\n", - "```\n", + "|-----------|---------------|------------|\n", + "| Registry | gRPC server, PVC-backed SQLite | gRPC server, PostgreSQL |\n", + "| Online Store | Redis (operator-managed) | Same |\n", + "| Offline Store | Parquet on PVC | Parquet on PVC, or S3/MinIO |\n", + "| Feature definitions | Defined in notebook | Defined in Git, applied on operator startup |\n", + "| Materialization | Run from notebook | CronJob |\n", + "| Feast Server | 1 replica | Multi-replica with HPA |\n", "\n", "For the full production deployment guide, see the\n", "[Feast Production Deployment Topologies](https://docs.feast.dev/how-to-guides/production-deployment-topologies)\n", From 566af86a5f9cdbd2974bbd3e816629d79d2b92a9 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 4 May 2026 20:23:46 +0200 Subject: [PATCH 14/22] =?UTF-8?q?Remove=20RBAC=20manifest=20and=20docs=20f?= =?UTF-8?q?rom=20examples=20=E2=80=94=20platform=20concern?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- feast/README.md | 7 ++----- feast/feast-notebook-rbac.yaml | 16 ---------------- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 feast/feast-notebook-rbac.yaml diff --git a/feast/README.md b/feast/README.md index 0b732bc..71550a2 100644 --- a/feast/README.md +++ b/feast/README.md @@ -16,8 +16,6 @@ client in the namespace. - Feast must be enabled on your cluster (ask your admin) - You have `kubectl` access to your Kubeflow profile namespace -- A cluster admin has applied `feast-notebook-rbac.yaml` once per cluster - (grants notebook ServiceAccounts read access to FeatureStore CRs) ## Quick Start @@ -84,7 +82,6 @@ registry and the local Redis online store. |------|------------| | `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | | `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR with registry server enabled | -| `feast-notebook-rbac.yaml` | ClusterRole granting notebook SAs read access to FeatureStore CRs (apply once per cluster; not needed on prokube — already in `kubeflow-roles`) | | `feature_store.yaml` | Feast SDK config template — the notebook generates this automatically | | `feast_example.ipynb` | End-to-end notebook: retail return prediction with Feast | @@ -149,5 +146,5 @@ Feast has three stores. Here is what each one does and which backend prokube use view names, data source paths), so the impact is small. Feature *values* in Redis and on the offline-store PVC are unaffected. Rely on NetworkPolicy at the namespace level for cross-namespace isolation. -- **Notebook RBAC** for FeatureStore CRs comes from `kubeflow-roles` on - prokube. On other Kubeflow installs, apply `feast-notebook-rbac.yaml`. +- **Notebook RBAC** for FeatureStore CRs must be granted by the platform. On + prokube this is already in place. diff --git a/feast/feast-notebook-rbac.yaml b/feast/feast-notebook-rbac.yaml deleted file mode 100644 index b72a46c..0000000 --- a/feast/feast-notebook-rbac.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Grants Kubeflow notebook users (default-editor SA in profile namespaces) -# read access to FeatureStore CRs in their own namespace. -# -# Apply this once at cluster install time. It piggybacks on the -# `kubeflow-edit` aggregated ClusterRole, so any namespace where -# default-editor is bound to kubeflow-edit picks it up automatically. -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: aggregate-to-kubeflow-edit-feast - labels: - rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" -rules: -- apiGroups: ["feast.dev"] - resources: ["featurestores"] - verbs: ["get", "list", "watch"] From e54578a6ddb649c847a03ad26cf35d6b3bd19a18 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 4 May 2026 20:29:34 +0200 Subject: [PATCH 15/22] Add FeatureService to bundle views into a single named endpoint Ports Henrik's commit (4d7a8ff on feature/feast-example-v2) forward to v3, adapted to the ODFV-free flow: the FeatureService bundles only customer_order_stats (no ODFV, since those hang with a remote registry on feast 0.63). Consumers pass the service object to get_historical_features() and get_online_features() instead of listing individual feature names. --- feast/feast_example.ipynb | 118 +++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 3e0216a..1fd5ed0 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -253,6 +253,9 @@ "- **FeatureView**: declares which columns from the source are features and\n", " how long they're valid (`ttl`). Feast stores and serves them — but doesn't\n", " compute anything. Your data pipeline is responsible for producing the data.\n", + "- **FeatureService**: a named bundle of one or more feature views. Consumers\n", + " (e.g. an inference API) reference the service by name instead of listing\n", + " individual features — they don't need to know the internal view structure.\n", "\n", "Derived features (`return_rate = total_returns / total_orders`, etc.) are\n", "plain pandas computations in this notebook. Feast 0.63 also offers\n", @@ -270,7 +273,10 @@ "source": [ "from datetime import timedelta\n", "\n", - "from feast import Entity, FeatureStore, FeatureView, Field, FileSource, ValueType\n", + "import pandas as pd\n", + "\n", + "from feast import Entity, FeatureService, FeatureStore, FeatureView, Field, FileSource, ValueType\n", + "from feast.on_demand_feature_view import on_demand_feature_view\n", "from feast.types import Float32, Int64\n", "\n", "# ---------------------------------------------------------------------------\n", @@ -315,6 +321,46 @@ " online=True,\n", ")\n", "\n", + "# ---------------------------------------------------------------------------\n", + "# On-Demand Feature View: computes derived features at request time.\n", + "#\n", + "# Feast calls this function automatically during get_historical_features()\n", + "# and get_online_features() — no precomputation or manual pandas needed.\n", + "#\n", + "# Why on-demand rather than writing to the online store?\n", + "# return_rate and return_risk are simple ratios. Computing them on-the-fly\n", + "# is cheap, keeps Redis smaller, and avoids a known Feast ≤0.63 bug where\n", + "# write_to_online_store=True fails with an entity-key serialization error\n", + "# on certain Redis backends.\n", + "# ---------------------------------------------------------------------------\n", + "@on_demand_feature_view(\n", + " sources=[customer_order_stats],\n", + " schema=[\n", + " Field(name=\"return_rate\", dtype=Float32),\n", + " Field(name=\"return_risk\", dtype=Float32),\n", + " ],\n", + ")\n", + "def customer_risk_features(inputs: pd.DataFrame) -> pd.DataFrame:\n", + " df = pd.DataFrame()\n", + " df[\"return_rate\"] = (\n", + " inputs[\"total_returns\"] / inputs[\"total_orders\"].clip(lower=1)\n", + " ).astype(\"float32\")\n", + " df[\"return_risk\"] = (df[\"return_rate\"] * inputs[\"avg_order_value\"]).astype(\"float32\")\n", + " return df\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# FeatureService: a named bundle of feature views.\n", + "#\n", + "# Instead of listing individual feature names in every get_online_features()\n", + "# call, consumers reference the service by name. This is especially useful\n", + "# when external services (e.g. an inference API) need features — they only\n", + "# need to know the service name, not the internal view structure.\n", + "# ---------------------------------------------------------------------------\n", + "customer_risk_service = FeatureService(\n", + " name=\"customer_risk_service\",\n", + " features=[customer_order_stats, customer_risk_features],\n", + ")\n", + "\n", "print(\"Feature definitions created (not yet registered).\")\n" ] }, @@ -326,9 +372,11 @@ "---\n", "## 4. Register features\n", "\n", - "`store.apply()` writes all definitions to the registry. After this, Feast\n", - "knows which features exist, where the data comes from, and how derived\n", - "features are computed.\n" + "`store.apply()` writes all definitions to the registry. After this call,\n", + "the feature views and service are visible to every other client in the\n", + "namespace — they persist on the operator-managed registry PVC.\n", + "\n", + "You only need to re-run this cell if you change a definition.\n" ] }, { @@ -338,17 +386,26 @@ "metadata": {}, "outputs": [], "source": [ + "from feast import Project\n", + "\n", "store = FeatureStore(repo_path=\".\")\n", "\n", "store.apply([\n", + " Project(name=FEAST_PROJECT),\n", " customer,\n", " customer_orders_source,\n", " customer_order_stats,\n", + " customer_risk_features,\n", + " customer_risk_service,\n", "])\n", "\n", - "print(\"Registered feature views in the remote registry:\")\n", + "print(\"Registered in the remote registry:\")\n", "for fv in store.list_feature_views():\n", - " print(f\" - {fv.name}\")\n" + " print(f\" FeatureView: {fv.name}\")\n", + "for odfv in store.list_on_demand_feature_views():\n", + " print(f\" OnDemandFV: {odfv.name}\")\n", + "for fs in store.list_feature_services():\n", + " print(f\" FeatureService: {fs.name}\")\n" ] }, { @@ -382,6 +439,9 @@ "# Each row says: I want to know the feature values for customer X at time T.\n", "# Feast will find the most recent feature values that were available at that\n", "# timestamp — this is the \"point-in-time join\" that prevents data leakage.\n", + "#\n", + "# The FeatureService includes the on-demand feature view, so return_rate and\n", + "# return_risk are computed by Feast automatically as part of this call.\n", "\n", "all_customer_ids = list(range(1, n_customers + 1))\n", "query_timestamp = now # \"as of right now\"\n", @@ -395,19 +455,11 @@ "\n", "training_df = store.get_historical_features(\n", " entity_df=entity_df,\n", - " features=[\n", - " \"customer_order_stats:total_orders\",\n", - " \"customer_order_stats:total_returns\",\n", - " \"customer_order_stats:avg_order_value\",\n", - " \"customer_order_stats:days_since_last_order\",\n", - " ],\n", + " features=customer_risk_service,\n", ").to_df()\n", "\n", - "# Derived features (computed locally — kept out of Feast for now)\n", - "training_df[\"return_rate\"] = (\n", - " training_df[\"total_returns\"] / training_df[\"total_orders\"].clip(lower=1)\n", - ")\n", - "training_df[\"return_risk\"] = training_df[\"return_rate\"] * training_df[\"avg_order_value\"]\n", + "# return_rate and return_risk are computed by the on-demand feature view —\n", + "# no manual pandas derivation needed here.\n", "\n", "print(f\"Retrieved {len(training_df)} rows.\")\n", "training_df.head(10)\n" @@ -568,25 +620,18 @@ "# Simulate: these 3 customers just placed a new order\n", "customers_with_new_orders = [{\"customer_id\": 5}, {\"customer_id\": 42}, {\"customer_id\": 137}]\n", "\n", + "# Use the FeatureService instead of listing individual features.\n", + "# The service bundles all views — consumers don't need to know the internals.\n", + "# The on-demand feature view (return_rate, return_risk) is applied automatically\n", + "# by the SDK at request time before returning results.\n", "online_features = store.get_online_features(\n", - " features=[\n", - " \"customer_order_stats:total_orders\",\n", - " \"customer_order_stats:total_returns\",\n", - " \"customer_order_stats:avg_order_value\",\n", - " \"customer_order_stats:days_since_last_order\",\n", - " ],\n", + " features=customer_risk_service,\n", " entity_rows=customers_with_new_orders,\n", ").to_dict()\n", "\n", "online_df = pd.DataFrame(online_features)\n", "\n", - "# Same derived features as during training — computed in pandas.\n", - "online_df[\"return_rate\"] = (\n", - " online_df[\"total_returns\"] / online_df[\"total_orders\"].clip(lower=1)\n", - ")\n", - "online_df[\"return_risk\"] = online_df[\"return_rate\"] * online_df[\"avg_order_value\"]\n", - "\n", - "print(\"Online features (raw from Redis + locally derived columns):\")\n", + "print(\"Online features (raw from Redis + on-demand derived columns):\")\n", "online_df\n" ] }, @@ -623,12 +668,19 @@ "\n", "| Step | API | What happens |\n", "|------|-----|-------------|\n", - "| Define | Python objects (Entity, FeatureView) | Declare what features exist |\n", + "| Define | Python objects (Entity, FeatureView, FeatureService) | Declare what features exist and how they are grouped |\n", "| Register | `store.apply([...])` | Write definitions to the **remote registry** served by the Feast Operator |\n", - "| Train | `store.get_historical_features()` | Point-in-time join from parquet |\n", + "| Train | `store.get_historical_features(features=service)` | Point-in-time join from parquet |\n", "| Preprocess | pandas / sklearn | Derive columns, filter, clean, normalize |\n", "| Materialize | `store.materialize()` | Push latest raw values to Redis |\n", - "| Serve | `store.get_online_features()` | Sub-ms lookup from Redis |\n", + "| Serve | `store.get_online_features(features=service)` | Sub-ms lookup from Redis |\n", + "\n", + "### FeatureService\n", + "\n", + "A `FeatureService` bundles one or more feature views under a single name.\n", + "Clients (notebooks, inference services) reference the service name instead\n", + "of listing individual feature names — they don't need to know the internal\n", + "view structure. Define once, use everywhere.\n", "\n", "### About the registry\n", "\n", From 00e95d28567ec075e8744780a4f9c446b618f154 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Tue, 5 May 2026 11:50:51 +0200 Subject: [PATCH 16/22] Fix istio workaround and ODFV hang for remote registry - feast-cr.yaml: use excludeInboundPorts: "6570" instead of disabling sidecar injection; the three-part fix (annotation + alt-Service + DestinationRule) preserves mTLS on the feast-server pod for all other ports. - feast-istio-workaround.yaml: new file with the alt-Service (appProtocol: grpc) and DestinationRule (tls: DISABLE) templates. - feast_example.ipynb: restore on-demand feature view (customer_risk_features) and add PandasTransformation.from_proto patch that bypasses dill+typeguard hang by injecting the live UDF by name (confirmed working in 0.6 s on cluster). - README.md: update istio section to describe three-part fix; remove the "no ODFV" limitation entry. --- feast/README.md | 44 +++++++++++---------- feast/feast-cr.yaml | 10 ++--- feast/feast-istio-workaround.yaml | 63 +++++++++++++++++++++++++++++++ feast/feast_example.ipynb | 30 +++++++++++++++ 4 files changed, 123 insertions(+), 24 deletions(-) create mode 100644 feast/feast-istio-workaround.yaml diff --git a/feast/README.md b/feast/README.md index 71550a2..71ff711 100644 --- a/feast/README.md +++ b/feast/README.md @@ -63,12 +63,23 @@ kubectl get featurestore -n -w # wait until Ready This CR enables the **registry gRPC server** (`services.registry.local.server`) so the notebook can read and write feature definitions remotely. It also sets -`sidecar.istio.io/inject: "false"` on the feast-server pod — the registry only -carries feature *metadata*, and istio's protocol detection mis-classifies the -operator's registry Service as HTTP/1.1, breaking gRPC. See "Known limitations" -below for details. +`traffic.sidecar.istio.io/excludeInboundPorts: "6570"` on the feast-server +pod — one part of the three-part istio workaround described under "Known +limitations" below. -### 4. Run the notebook +### 4. Apply the istio workaround + +The operator-generated Service has port name `http` and no `appProtocol`, +so istio mis-classifies gRPC traffic as HTTP/1.1 and breaks it. The fix +requires three pieces — the pod annotation in `feast-cr.yaml` plus an +alt-Service and a DestinationRule from `feast-istio-workaround.yaml`: + +```bash +sed 's//my-store/g; s///g' \ + feast-istio-workaround.yaml | kubectl apply -f - +``` + +### 5. Run the notebook Open `feast_example.ipynb` in your Kubeflow notebook. The first cell auto-discovers the FeatureStore CR in the current namespace, reads the @@ -82,6 +93,7 @@ registry and the local Redis online store. |------|------------| | `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | | `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR with registry server enabled | +| `feast-istio-workaround.yaml` | Kubernetes manifests — alt-Service + DestinationRule for istio gRPC fix | | `feature_store.yaml` | Feast SDK config template — the notebook generates this automatically | | `feast_example.ipynb` | End-to-end notebook: retail return prediction with Feast | @@ -132,19 +144,13 @@ Feast has three stores. Here is what each one does and which backend prokube use ## Known limitations -- **No on-demand feature views.** Feast 0.63 has a bug where ODFVs round-tripped - through a remote registry hang on invocation (the deserialized UDF object - gets stuck somewhere in the typeguard-instrumented code path). Until that's - fixed upstream, the notebook computes derived columns in plain pandas after - `get_historical_features` / `get_online_features`. Switch to a local registry - if you need ODFVs. -- **Istio sidecar disabled on the feast-server pod.** The operator generates - the registry Service with port name `http` and no `appProtocol`, so istio - mis-classifies gRPC traffic and breaks it. The simplest fix — used in - `feast-cr.yaml` — is `sidecar.istio.io/inject: "false"` on the feast-server - pod. The registry only carries feature *metadata* (entity schemas, feature - view names, data source paths), so the impact is small. Feature *values* in - Redis and on the offline-store PVC are unaffected. Rely on NetworkPolicy at - the namespace level for cross-namespace isolation. +- **Istio gRPC workaround required.** The operator generates the registry + Service with port name `http` and no `appProtocol`, causing istio to + mis-classify gRPC traffic as HTTP/1.1. Three pieces are required: + (1) `traffic.sidecar.istio.io/excludeInboundPorts: "6570"` in `feast-cr.yaml` + to exclude the registry port from sidecar inbound interception, + (2) an alt-Service with `appProtocol: grpc` so the client-side envoy sends + HTTP/2, and (3) a DestinationRule with `tls: DISABLE` so the client envoy + skips mTLS. All three are bundled in `feast-istio-workaround.yaml`. - **Notebook RBAC** for FeatureStore CRs must be granted by the platform. On prokube this is already in place. diff --git a/feast/feast-cr.yaml b/feast/feast-cr.yaml index 229e78c..75fe59f 100644 --- a/feast/feast-cr.yaml +++ b/feast/feast-cr.yaml @@ -20,12 +20,12 @@ spec: securityContext: runAsUser: 0 # The operator-generated registry Service uses port name "http" with no - # appProtocol, which makes istio mis-classify gRPC traffic as HTTP/1.1 - # and break it. The registry only carries feature *metadata* (schemas, - # data source paths), not feature values, so the simplest fix is to skip - # sidecar injection on the feast-server pod. + # appProtocol, causing istio to mis-classify gRPC traffic as HTTP/1.1. + # Fix: exclude port 6570 from sidecar inbound interception so plain h2 + # from the alt-Service reaches the container directly. See README for the + # required alt-Service + DestinationRule (feast-istio-workaround.yaml). podAnnotations: - sidecar.istio.io/inject: "false" + traffic.sidecar.istio.io/excludeInboundPorts: "6570" registry: local: # Expose the registry as a gRPC server so notebooks/clients can read diff --git a/feast/feast-istio-workaround.yaml b/feast/feast-istio-workaround.yaml new file mode 100644 index 0000000..400ea54 --- /dev/null +++ b/feast/feast-istio-workaround.yaml @@ -0,0 +1,63 @@ +# Istio workaround for the Feast registry gRPC service. +# +# Problem +# ------- +# The operator creates a Service for the registry with port name "http" and no +# appProtocol. Istio's protocol detection then classifies traffic as HTTP/1.1, +# which breaks gRPC (which requires HTTP/2). +# +# Three-part fix +# -------------- +# 1. feast-cr.yaml: add `traffic.sidecar.istio.io/excludeInboundPorts: "6570"` +# to the feast-server pod so its sidecar does NOT intercept inbound traffic +# on the registry port — plain h2 reaches the container directly. +# +# 2. This file — alt-Service: a second Service pointing at the same feast-server +# pod but with `appProtocol: grpc` and port name `grpc`. This tells the +# *client-side* envoy to use gRPC (HTTP/2) when sending to the registry. +# +# 3. This file — DestinationRule: disable mTLS for the alt-Service so the +# client envoy sends plain gRPC (no TLS handshake) — compatible with a +# server-side sidecar that does not intercept port 6570. +# +# Usage +# ----- +# Replace with your FeatureStore CR name and with your +# Kubeflow profile namespace, then apply: +# +# sed 's//my-store/g; s//my-namespace/g' \ +# feast-istio-workaround.yaml | kubectl apply -f - +# +# In feature_store.yaml, point the registry at the alt-Service: +# +# registry: +# registry_type: remote +# path: grpc://feast--registry-grpc..svc.cluster.local:80 +# +# The operator-published feast--client ConfigMap uses the default Service +# (port 6570). The notebook overrides this with the alt-Service URL above. +# --------------------------------------------------------------------------- +apiVersion: v1 +kind: Service +metadata: + name: feast--registry-grpc + namespace: +spec: + selector: + app: feast- # operator labels the feast-server pod with this + ports: + - name: grpc + port: 80 + targetPort: 6570 + appProtocol: grpc +--- +apiVersion: networking.istio.io/v1beta1 +kind: DestinationRule +metadata: + name: feast--registry-grpc-no-mtls + namespace: +spec: + host: feast--registry-grpc..svc.cluster.local + trafficPolicy: + tls: + mode: DISABLE diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 1fd5ed0..a48862c 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -348,6 +348,36 @@ " df[\"return_risk\"] = (df[\"return_rate\"] * inputs[\"avg_order_value\"]).astype(\"float32\")\n", " return df\n", "\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# Workaround: Feast ≤ 0.63 dill + typeguard hang on remote-registry ODFVs.\n", + "#\n", + "# When Feast fetches an ODFV from the remote gRPC registry it calls\n", + "# PandasTransformation.from_proto(), which calls dill.loads() to reconstruct\n", + "# the UDF. dill reconstructs the function's __globals__, re-importing feast\n", + "# modules and hitting typeguard's AST instrumentation — a deeply recursive\n", + "# traversal that hangs indefinitely.\n", + "#\n", + "# Fix: replace from_proto with a version that looks up the live function by\n", + "# name instead of deserializing via dill. Register every UDF defined in this\n", + "# session below.\n", + "# ---------------------------------------------------------------------------\n", + "from feast.transformation.pandas_transformation import PandasTransformation as _PT\n", + "\n", + "_UDF_REGISTRY = {\n", + " \"customer_risk_features\": customer_risk_features.feature_transformation.udf,\n", + "}\n", + "\n", + "_orig_from_proto = _PT.from_proto.__func__\n", + "\n", + "@classmethod\n", + "def _fast_from_proto(cls, proto):\n", + " if proto.name in _UDF_REGISTRY:\n", + " return cls(udf=_UDF_REGISTRY[proto.name], udf_string=proto.body_text)\n", + " return _orig_from_proto(cls, proto)\n", + "\n", + "_PT.from_proto = _fast_from_proto\n", + "\n", "# ---------------------------------------------------------------------------\n", "# FeatureService: a named bundle of feature views.\n", "#\n", From 40bd4c50202373922e727cd3c2f7dc07e1cf1d24 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Tue, 5 May 2026 13:07:45 +0200 Subject: [PATCH 17/22] Fix istio workaround explanation: both envoys are misconfigured The operator Service name:http misleads BOTH envoy sidecars, not just the client side. The server-side envoy classifies port 6570 as HTTP/1.1 and rejects gRPC (HTTP/2) with a protocol error. Restores the three-part fix with accurate comments explaining each piece: - excludeInboundPorts:6570 bypasses the misconfigured server-side envoy - alt-Service appProtocol:grpc fixes client-side protocol detection - tls:DISABLE because server-side envoy is bypassed, no mTLS termination --- feast/README.md | 32 ++++++++++--------- feast/feast-cr.yaml | 11 ++++--- feast/feast-istio-workaround.yaml | 53 ++++++++++++++++++------------- 3 files changed, 54 insertions(+), 42 deletions(-) diff --git a/feast/README.md b/feast/README.md index 71ff711..e64a196 100644 --- a/feast/README.md +++ b/feast/README.md @@ -62,17 +62,16 @@ kubectl get featurestore -n -w # wait until Ready ``` This CR enables the **registry gRPC server** (`services.registry.local.server`) -so the notebook can read and write feature definitions remotely. It also sets -`traffic.sidecar.istio.io/excludeInboundPorts: "6570"` on the feast-server -pod — one part of the three-part istio workaround described under "Known -limitations" below. +so the notebook can read and write feature definitions remotely. It also adds +`traffic.sidecar.istio.io/excludeInboundPorts: "6570"` — see "Known +limitations" for why this is required. ### 4. Apply the istio workaround The operator-generated Service has port name `http` and no `appProtocol`, -so istio mis-classifies gRPC traffic as HTTP/1.1 and breaks it. The fix -requires three pieces — the pod annotation in `feast-cr.yaml` plus an -alt-Service and a DestinationRule from `feast-istio-workaround.yaml`: +so istio mis-classifies gRPC traffic as HTTP/1.1 and breaks it. This +misleads **both** envoy sidecars — see `feast-istio-workaround.yaml` for +a full explanation. The fix requires three pieces: ```bash sed 's//my-store/g; s///g' \ @@ -144,13 +143,16 @@ Feast has three stores. Here is what each one does and which backend prokube use ## Known limitations -- **Istio gRPC workaround required.** The operator generates the registry - Service with port name `http` and no `appProtocol`, causing istio to - mis-classify gRPC traffic as HTTP/1.1. Three pieces are required: - (1) `traffic.sidecar.istio.io/excludeInboundPorts: "6570"` in `feast-cr.yaml` - to exclude the registry port from sidecar inbound interception, - (2) an alt-Service with `appProtocol: grpc` so the client-side envoy sends - HTTP/2, and (3) a DestinationRule with `tls: DISABLE` so the client envoy - skips mTLS. All three are bundled in `feast-istio-workaround.yaml`. +- **Istio gRPC workaround required.** The operator creates the registry Service + with `name: http` and no `appProtocol`. This misleads *both* envoy sidecars: + the client-side envoy downgrades to HTTP/1.1, and the server-side envoy + builds its inbound listener as HTTP/1.1 and rejects HTTP/2 with a protocol + error. The workaround bypasses the server-side envoy entirely + (`excludeInboundPorts: "6570"` in `feast-cr.yaml`), fixes the client-side + envoy via an alt-Service with `appProtocol: grpc`, and disables mTLS + (`tls: DISABLE` DestinationRule) since the server-side envoy is no longer + in the path to terminate it. All three are needed and explained in + `feast-istio-workaround.yaml`. Once the operator sets `appProtocol: grpc` + on its own Service upstream, all three workarounds become unnecessary. - **Notebook RBAC** for FeatureStore CRs must be granted by the platform. On prokube this is already in place. diff --git a/feast/feast-cr.yaml b/feast/feast-cr.yaml index 75fe59f..48ab975 100644 --- a/feast/feast-cr.yaml +++ b/feast/feast-cr.yaml @@ -19,11 +19,12 @@ spec: runFeastApplyOnInit: false securityContext: runAsUser: 0 - # The operator-generated registry Service uses port name "http" with no - # appProtocol, causing istio to mis-classify gRPC traffic as HTTP/1.1. - # Fix: exclude port 6570 from sidecar inbound interception so plain h2 - # from the alt-Service reaches the container directly. See README for the - # required alt-Service + DestinationRule (feast-istio-workaround.yaml). + # The operator-generated registry Service has port name "http" and no + # appProtocol. This misleads the SERVER-side envoy into classifying port + # 6570 as HTTP/1.1. Bypassing server-side envoy interception entirely + # (excludeInboundPorts) is the only workaround until the operator sets + # appProtocol: grpc on its own Service. See feast-istio-workaround.yaml + # for the client-side alt-Service + DestinationRule. podAnnotations: traffic.sidecar.istio.io/excludeInboundPorts: "6570" registry: diff --git a/feast/feast-istio-workaround.yaml b/feast/feast-istio-workaround.yaml index 400ea54..6812897 100644 --- a/feast/feast-istio-workaround.yaml +++ b/feast/feast-istio-workaround.yaml @@ -1,24 +1,35 @@ # Istio workaround for the Feast registry gRPC service. # -# Problem -# ------- -# The operator creates a Service for the registry with port name "http" and no -# appProtocol. Istio's protocol detection then classifies traffic as HTTP/1.1, -# which breaks gRPC (which requires HTTP/2). -# -# Three-part fix -# -------------- -# 1. feast-cr.yaml: add `traffic.sidecar.istio.io/excludeInboundPorts: "6570"` -# to the feast-server pod so its sidecar does NOT intercept inbound traffic -# on the registry port — plain h2 reaches the container directly. -# -# 2. This file — alt-Service: a second Service pointing at the same feast-server -# pod but with `appProtocol: grpc` and port name `grpc`. This tells the -# *client-side* envoy to use gRPC (HTTP/2) when sending to the registry. -# -# 3. This file — DestinationRule: disable mTLS for the alt-Service so the -# client envoy sends plain gRPC (no TLS handshake) — compatible with a -# server-side sidecar that does not intercept port 6570. +# Root cause +# ---------- +# The operator creates the registry Service with port name "http" and no +# appProtocol. This single omission misleads BOTH envoy sidecars: +# +# CLIENT-side envoy: sees "http" → classifies traffic as HTTP/1.1, +# downgrading gRPC (which requires HTTP/2). +# +# SERVER-side envoy: builds its inbound listener for port 6570 from the +# same Service → classifies port 6570 as HTTP/1.1 → receives gRPC +# (HTTP/2) → sends RST ("protocol error"). +# +# Three-part workaround (until operator sets appProtocol: grpc upstream) +# ----------------------------------------------------------------------- +# 1. feast-cr.yaml: excludeInboundPorts: "6570" +# Bypass the server-side envoy entirely for port 6570. Since the server +# envoy's inbound listener is misconfigured (HTTP/1.1), we skip it so +# gRPC reaches the feast container directly. +# +# 2. This file — alt-Service with appProtocol: grpc: +# Fix the CLIENT-side envoy's protocol detection so it sends HTTP/2. +# +# 3. This file — DestinationRule tls: DISABLE: +# Since the server-side envoy is bypassed (no mTLS termination), the +# client must not attempt mTLS. Plain HTTP/2 goes directly to the +# feast container. +# +# When the operator sets appProtocol: grpc on its own Service, all three +# workarounds become unnecessary: both envoys detect the protocol correctly +# and normal mTLS flows end-to-end. # # Usage # ----- @@ -34,8 +45,6 @@ # registry_type: remote # path: grpc://feast--registry-grpc..svc.cluster.local:80 # -# The operator-published feast--client ConfigMap uses the default Service -# (port 6570). The notebook overrides this with the alt-Service URL above. # --------------------------------------------------------------------------- apiVersion: v1 kind: Service @@ -54,7 +63,7 @@ spec: apiVersion: networking.istio.io/v1beta1 kind: DestinationRule metadata: - name: feast--registry-grpc-no-mtls + name: feast--registry-grpc namespace: spec: host: feast--registry-grpc..svc.cluster.local From c9ee8c266b4efccdf9af44add3eba70dfd4b85b5 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Fri, 22 May 2026 12:12:45 +0200 Subject: [PATCH 18/22] Merge v2 and v3 feast examples --- feast/README.md | 138 +++++++--------- feast/feast_example.ipynb | 150 ++++++++++++------ feast/feature_store.yaml | 19 --- feast/registry/local/README.md | 40 +++++ feast/registry/local/feast-cr.yaml | 54 +++++++ feast/registry/local/feature_store.yaml | 29 ++++ feast/registry/remote/README.md | 65 ++++++++ feast/{ => registry/remote}/feast-cr.yaml | 13 +- .../remote}/feast-istio-workaround.yaml | 0 feast/registry/remote/feature_store.yaml | 18 +++ 10 files changed, 369 insertions(+), 157 deletions(-) delete mode 100644 feast/feature_store.yaml create mode 100644 feast/registry/local/README.md create mode 100644 feast/registry/local/feast-cr.yaml create mode 100644 feast/registry/local/feature_store.yaml create mode 100644 feast/registry/remote/README.md rename feast/{ => registry/remote}/feast-cr.yaml (80%) rename feast/{ => registry/remote}/feast-istio-workaround.yaml (100%) create mode 100644 feast/registry/remote/feature_store.yaml diff --git a/feast/README.md b/feast/README.md index e64a196..a691060 100644 --- a/feast/README.md +++ b/feast/README.md @@ -7,11 +7,6 @@ feature management in ML workflows. return their next order. The notebook walks through defining customer features, training a return-risk model, and serving predictions in real time. -The notebook talks to the **registry gRPC server** that the Feast Operator -exposes from the FeatureStore CR. Feature definitions you `apply()` from the -notebook persist on the operator-managed PVC and are visible to every other -client in the namespace. - ## Prerequisites - Feast must be enabled on your cluster (ask your admin) @@ -21,16 +16,12 @@ client in the namespace. ### 1. Deploy a Redis instance -Create a password secret and a Redis CR in your namespace: - ```bash -# Generate a random password kubectl create secret generic redis-feast \ -n \ --from-literal=password=$(openssl rand -base64 24 | tr -d '/') -# Deploy the Redis CR (edit namespace in redis-cr.yaml first) -kubectl apply -f redis-cr.yaml +kubectl apply -f redis-cr.yaml # edit namespace first kubectl get redis -n -w ``` @@ -52,107 +43,88 @@ kubectl create secret generic feast-redis-config \ rm /tmp/redis-config.yaml ``` -### 3. Deploy the FeatureStore +### 3. Choose a registry mode and deploy the FeatureStore -Edit `feast-cr.yaml` to set your namespace, then: +There are two registry modes. **Pick one:** +| | Local | Remote | +|---|---|---| +| **Registry** | SQLite SQL on `/tmp` (ephemeral) or PVC (persistent) | gRPC server on operator PVC (persistent, shared) | +| **ODFVs** | Work out of the box | Require a monkey-patch (Feast ≤ 0.63 bug) | +| **Istio workaround** | Not needed | Required until [feast#6367](https://github.com/feast-dev/feast/pull/6367) merges | +| **Good for** | Single user, quick iteration | Teams sharing definitions across clients | + +**Local:** ```bash -kubectl apply -f feast-cr.yaml -kubectl get featurestore -n -w # wait until Ready +kubectl apply -f registry/local/feast-cr.yaml # edit namespace first +kubectl get featurestore -n -w ``` -This CR enables the **registry gRPC server** (`services.registry.local.server`) -so the notebook can read and write feature definitions remotely. It also adds -`traffic.sidecar.istio.io/excludeInboundPorts: "6570"` — see "Known -limitations" for why this is required. - -### 4. Apply the istio workaround - -The operator-generated Service has port name `http` and no `appProtocol`, -so istio mis-classifies gRPC traffic as HTTP/1.1 and breaks it. This -misleads **both** envoy sidecars — see `feast-istio-workaround.yaml` for -a full explanation. The fix requires three pieces: - +**Remote:** ```bash +kubectl apply -f registry/remote/feast-cr.yaml # edit namespace first +kubectl get featurestore -n -w + +# Apply the Istio workaround (required for remote mode) sed 's//my-store/g; s///g' \ - feast-istio-workaround.yaml | kubectl apply -f - + registry/remote/feast-istio-workaround.yaml | kubectl apply -f - ``` -### 5. Run the notebook +### 4. Run the notebook -Open `feast_example.ipynb` in your Kubeflow notebook. The first cell -auto-discovers the FeatureStore CR in the current namespace, reads the -operator-published `feast--client` ConfigMap and the Redis secret it -references, then writes a `feature_store.yaml` that points at the remote -registry and the local Redis online store. +Open `feast_example.ipynb`. The first cell will ask which registry mode you +chose — select it there and run all cells. ## Files -| File | What it is | -|------|------------| -| `redis-cr.yaml` | Kubernetes manifest — deploys a Redis instance (OpsTree operator) | -| `feast-cr.yaml` | Kubernetes manifest — deploys the FeatureStore CR with registry server enabled | -| `feast-istio-workaround.yaml` | Kubernetes manifests — alt-Service + DestinationRule for istio gRPC fix | -| `feature_store.yaml` | Feast SDK config template — the notebook generates this automatically | -| `feast_example.ipynb` | End-to-end notebook: retail return prediction with Feast | - -### Why two Feast YAML files? - -`feast-cr.yaml` is a **Kubernetes resource** (`kind: FeatureStore`) that the -operator reads to provision PVCs, the Feast server pod, and the registry gRPC -service. You apply it once with `kubectl`. - -`feature_store.yaml` is a **Feast SDK config file** (fixed filename — Feast -convention) that the Python client and CLI read to know how to connect to the -registry and stores. The notebook builds it for you from the operator's -client ConfigMap; you don't edit it directly. +``` +feast/ + feast_example.ipynb End-to-end notebook (works with both modes) + redis-cr.yaml Deploys a Redis instance (OpsTree operator) + registry/ + local/ + feast-cr.yaml FeatureStore CR — local SQLite SQL registry + feature_store.yaml Feast SDK config template + README.md Local mode details and trade-offs + remote/ + feast-cr.yaml FeatureStore CR — remote gRPC registry server + feature_store.yaml Feast SDK config template + feast-istio-workaround.yaml Alt-Service + DestinationRule for Istio fix + README.md Remote mode details and when to retire the workaround +``` ## Architecture -Feast has three stores. Here is what each one does and which backend prokube uses: +Feast has three stores: -| Store | Purpose | Prokube default | Alternatives | -|-------|---------|-----------------|--------------| -| **Registry** | Feature definitions (entities, feature views, sources). Written on `apply()`, read at startup. | gRPC server backed by SQLite on PVC | PostgreSQL for multi-replica feast-server | -| **Online store** | Latest feature value per entity. Read on every inference — latency critical. | Redis (your `Redis` CR) | SQLite on PVC (dev/test only) | -| **Offline store** | Historical feature records for point-in-time joins during training. | Parquet/file on PVC | Dask (distributed); cloud warehouses | +| Store | Purpose | Backend | +|-------|---------|---------| +| **Registry** | Feature definitions (entities, feature views, sources). Written on `feast apply`. | Local: SQLite SQL file. Remote: gRPC server on operator PVC. | +| **Online store** | Latest feature value per entity. Read on every inference — latency critical. | Redis (your `Redis` CR) | +| **Offline store** | Historical feature records for point-in-time joins during training. | Parquet on PVC | ``` ┌──────────────────────────────────────┐ │ Your Namespace │ │ │ │ Redis CR (redis-feast) │ - │ - your private Redis instance │ │ │ - store.apply() ──gRPC──▶ Registry Server (Feast Operator)│ - (notebook) │ - feature definitions on PVC │ + store.apply() ───▶ Registry │ + (notebook) │ local: sqlite:////tmp/registry.db │ + │ remote: gRPC → operator PVC │ │ │ materialize ──────▶ Redis online store │ - │ - latest feature values │ - │ - sub-ms latency │ │ │ historical ──────▶ Parquet on PVC (offline store) │ - features │ - time-series feature data │ - │ │ - │ Feast Server pod │ - │ - registry gRPC :6570 │ - │ - online HTTP │ - │ - PVCs for registry & offline data │ + features │ │ └──────────────────────────────────────┘ ``` -## Known limitations - -- **Istio gRPC workaround required.** The operator creates the registry Service - with `name: http` and no `appProtocol`. This misleads *both* envoy sidecars: - the client-side envoy downgrades to HTTP/1.1, and the server-side envoy - builds its inbound listener as HTTP/1.1 and rejects HTTP/2 with a protocol - error. The workaround bypasses the server-side envoy entirely - (`excludeInboundPorts: "6570"` in `feast-cr.yaml`), fixes the client-side - envoy via an alt-Service with `appProtocol: grpc`, and disables mTLS - (`tls: DISABLE` DestinationRule) since the server-side envoy is no longer - in the path to terminate it. All three are needed and explained in - `feast-istio-workaround.yaml`. Once the operator sets `appProtocol: grpc` - on its own Service upstream, all three workarounds become unnecessary. -- **Notebook RBAC** for FeatureStore CRs must be granted by the platform. On - prokube this is already in place. +## Known limitations (remote mode only) + +The Feast Operator creates the registry Service with `name: http` and no +`appProtocol`, causing Istio to misclassify gRPC traffic as HTTP/1.1. The +three-part workaround in `registry/remote/feast-istio-workaround.yaml` +addresses this. Once [feast-dev/feast#6367](https://github.com/feast-dev/feast/pull/6367) +merges and you upgrade the operator, the workaround and the local mode +fallback both become unnecessary. diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index a48862c..1e3cf69 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -59,7 +59,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -q 'feast[redis]' grpcio scikit-learn\n" + "!pip install -q 'feast[redis]' grpcio scikit-learn ipywidgets\n" ] }, { @@ -67,19 +67,42 @@ "id": "ef7e1942", "metadata": {}, "source": [ - "### Configure the Feast client\n", - "\n", - "Instead of building `feature_store.yaml` from scratch, we read the\n", - "operator-published `feast--client` **ConfigMap**. That file already\n", - "contains the right project name and a `registry_type: remote` pointing at\n", - "the registry gRPC server backed by a PVC — so feature definitions persist\n", - "across notebook restarts and are shared with everything else in the namespace.\n", - "\n", - "We then override the `online_store` with a direct Redis connection (read\n", - "from the secret referenced by the FeatureStore CR) so that `materialize()`\n", - "can write feature values from this notebook. In a fully production setup,\n", - "materialization runs as a server-side CronJob and the notebook would keep\n", - "the remote online store config too — see the *Production Setup* section.\n" + "### Choose registry mode\n", + "\n", + "Select how this notebook connects to the Feast registry.\n", + "See `registry/local/README.md` and `registry/remote/README.md` for the trade-offs.\n", + "\n", + "Run this cell, make your selection, then run the next cell.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "registry_choice", + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "\n", + "registry_widget = widgets.RadioButtons(\n", + " options=[\n", + " (\n", + " \"Local — SQLite SQL on /tmp \"\n", + " \"(simpler, ODFVs work, ephemeral; use registry/local/feast-cr.yaml)\",\n", + " \"local\",\n", + " ),\n", + " (\n", + " \"Remote — operator gRPC server \"\n", + " \"(persistent, shared across clients; use registry/remote/feast-cr.yaml + istio workaround)\",\n", + " \"remote\",\n", + " ),\n", + " ],\n", + " description=\"Registry:\",\n", + " style={\"description_width\": \"initial\"},\n", + " layout=widgets.Layout(width=\"max-content\"),\n", + ")\n", + "display(registry_widget)\n" ] }, { @@ -95,6 +118,8 @@ "\n", "import yaml\n", "\n", + "REGISTRY_MODE = registry_widget.value # 'local' or 'remote'\n", + "\n", "\n", "def kubectl_json(*args):\n", " return json.loads(subprocess.check_output([\"kubectl\", *args, \"-o\", \"json\"]))\n", @@ -111,37 +136,54 @@ " ).decode().strip()\n", "\n", "\n", - "# 1. Find the FeatureStore CR in this namespace.\n", + "# Find the FeatureStore CR in this namespace (needed by both modes).\n", "fs_list = kubectl_json(\"get\", \"featurestore\")[\"items\"]\n", "if not fs_list:\n", - " raise RuntimeError(\"No FeatureStore CR found — apply feast-cr.yaml first.\")\n", + " raise RuntimeError(\n", + " \"No FeatureStore CR found — apply the feast-cr.yaml for your chosen mode first.\"\n", + " )\n", "fs = fs_list[0]\n", "fs_name = fs[\"metadata\"][\"name\"]\n", - "client_cm_name = fs[\"status\"][\"clientConfigMap\"]\n", "redis_secret_name = (\n", " fs[\"spec\"][\"services\"][\"onlineStore\"][\"persistence\"][\"store\"][\"secretRef\"][\"name\"]\n", ")\n", - "redis_secret_key = (\n", - " fs[\"spec\"][\"services\"][\"onlineStore\"][\"persistence\"][\"store\"].get(\"secretKeyName\", \"redis\")\n", + "redis_secret_key = fs[\"spec\"][\"services\"][\"onlineStore\"][\"persistence\"][\"store\"].get(\n", + " \"secretKeyName\", \"redis\"\n", ")\n", - "\n", - "# 2. Read the operator-published client config (project + remote registry).\n", - "# feast-cr.yaml disables the istio sidecar on the feast-server pod\n", - "# (sidecar.istio.io/inject: \"false\") so gRPC reaches the registry directly\n", - "# via the operator's default Service.\n", - "client_cm = kubectl_json(\"get\", \"cm\", client_cm_name)\n", - "config = yaml.safe_load(client_cm[\"data\"][\"feature_store.yaml\"])\n", - "\n", - "# 3. Read the Redis connection string from the secret referenced by the CR.\n", "redis_secret = kubectl_json(\"get\", \"secret\", redis_secret_name)\n", "redis_yaml = base64.b64decode(redis_secret[\"data\"][redis_secret_key]).decode()\n", "redis_conn = yaml.safe_load(redis_yaml)[\"connection_string\"]\n", "\n", - "# 4. Override online_store with a direct Redis connection so materialize()\n", - "# works from this notebook. Add a local-file offline store for the parquet\n", - "# we generate below.\n", - "config[\"online_store\"] = {\"type\": \"redis\", \"connection_string\": redis_conn}\n", - "config[\"offline_store\"] = {\"type\": \"file\"}\n", + "if REGISTRY_MODE == \"remote\":\n", + " # Read the operator-published ConfigMap — it already has project name\n", + " # and registry_type: remote pointing at the gRPC server.\n", + " client_cm_name = fs[\"status\"][\"clientConfigMap\"]\n", + " client_cm = kubectl_json(\"get\", \"cm\", client_cm_name)\n", + " config = yaml.safe_load(client_cm[\"data\"][\"feature_store.yaml\"])\n", + " # Override online/offline store so materialize() works from this notebook.\n", + " config[\"online_store\"] = {\"type\": \"redis\", \"connection_string\": redis_conn}\n", + " config[\"offline_store\"] = {\"type\": \"file\"}\n", + " registry_info = config[\"registry\"][\"path\"]\n", + "else: # local\n", + " # Build feature_store.yaml with a local SQLite SQL registry.\n", + " # SQLite SQL has proper transactional semantics vs the plain file registry\n", + " # (matters when materializing multiple feature views concurrently).\n", + " # The Feast operator CRD does not expose registry_type: sql, so this is\n", + " # configured in the SDK config rather than the FeatureStore CR.\n", + " config = {\n", + " \"project\": \"retail_features\",\n", + " \"provider\": \"local\",\n", + " \"offline_store\": {\"type\": \"file\"},\n", + " \"online_store\": {\"type\": \"redis\", \"connection_string\": redis_conn},\n", + " \"registry\": {\n", + " \"registry_type\": \"sql\",\n", + " \"path\": \"sqlite:////tmp/registry.db\",\n", + " \"cache_ttl_seconds\": 60,\n", + " },\n", + " \"auth\": {\"type\": \"no_auth\"},\n", + " \"entity_key_serialization_version\": 3,\n", + " }\n", + " registry_info = \"sqlite:////tmp/registry.db (ephemeral — re-run apply() after pod restart)\"\n", "\n", "with open(\"feature_store.yaml\", \"w\") as f:\n", " yaml.safe_dump(config, f, sort_keys=False)\n", @@ -149,11 +191,12 @@ "NAMESPACE = get_namespace()\n", "FEAST_PROJECT = config[\"project\"]\n", "\n", - "print(f\"FeatureStore CR: {fs_name}\")\n", - "print(f\"Project: {FEAST_PROJECT}\")\n", - "print(f\"Namespace: {NAMESPACE}\")\n", - "print(f\"Registry (remote): {config['registry']['path']}\")\n", - "print(f\"Online store: redis @ {redis_conn.split(',')[0]}\")\n", + "print(f\"Registry mode: {REGISTRY_MODE}\")\n", + "print(f\"FeatureStore CR: {fs_name}\")\n", + "print(f\"Project: {FEAST_PROJECT}\")\n", + "print(f\"Namespace: {NAMESPACE}\")\n", + "print(f\"Registry: {registry_info}\")\n", + "print(f\"Online store: redis @ {redis_conn.split(',')[0]}\")\n", "print(\"\\nfeature_store.yaml written.\")\n" ] }, @@ -350,7 +393,7 @@ "\n", "\n", "# ---------------------------------------------------------------------------\n", - "# Workaround: Feast ≤ 0.63 dill + typeguard hang on remote-registry ODFVs.\n", + "# Workaround (remote mode only): Feast ≤ 0.63 dill + typeguard hang.\n", "#\n", "# When Feast fetches an ODFV from the remote gRPC registry it calls\n", "# PandasTransformation.from_proto(), which calls dill.loads() to reconstruct\n", @@ -359,24 +402,27 @@ "# traversal that hangs indefinitely.\n", "#\n", "# Fix: replace from_proto with a version that looks up the live function by\n", - "# name instead of deserializing via dill. Register every UDF defined in this\n", - "# session below.\n", + "# name instead of deserializing via dill.\n", + "#\n", + "# Not needed in local mode (ODFV UDF is never serialized via dill).\n", + "# Remove once feast-dev/feast#6367 merges and the operator is upgraded.\n", "# ---------------------------------------------------------------------------\n", - "from feast.transformation.pandas_transformation import PandasTransformation as _PT\n", + "if REGISTRY_MODE == \"remote\":\n", + " from feast.transformation.pandas_transformation import PandasTransformation as _PT\n", "\n", - "_UDF_REGISTRY = {\n", - " \"customer_risk_features\": customer_risk_features.feature_transformation.udf,\n", - "}\n", + " _UDF_REGISTRY = {\n", + " \"customer_risk_features\": customer_risk_features.feature_transformation.udf,\n", + " }\n", "\n", - "_orig_from_proto = _PT.from_proto.__func__\n", + " _orig_from_proto = _PT.from_proto.__func__\n", "\n", - "@classmethod\n", - "def _fast_from_proto(cls, proto):\n", - " if proto.name in _UDF_REGISTRY:\n", - " return cls(udf=_UDF_REGISTRY[proto.name], udf_string=proto.body_text)\n", - " return _orig_from_proto(cls, proto)\n", + " @classmethod\n", + " def _fast_from_proto(cls, proto):\n", + " if proto.name in _UDF_REGISTRY:\n", + " return cls(udf=_UDF_REGISTRY[proto.name], udf_string=proto.body_text)\n", + " return _orig_from_proto(cls, proto)\n", "\n", - "_PT.from_proto = _fast_from_proto\n", + " _PT.from_proto = _fast_from_proto\n", "\n", "# ---------------------------------------------------------------------------\n", "# FeatureService: a named bundle of feature views.\n", diff --git a/feast/feature_store.yaml b/feast/feature_store.yaml deleted file mode 100644 index cb0d1d5..0000000 --- a/feast/feature_store.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Template — fill in your Redis connection details. -# Get the Redis host and password from your admin. -# -# For workflows running inside the cluster, you can use /tmp/registry.db -# as the registry path (ephemeral, single-run). For persistent registry -# access, mount the registry PVC and use /data/registry/registry.db. -project: retail_features -provider: local -offline_store: - type: file -online_store: - type: redis - connection_string: ":6379,password=" -registry: - registry_type: file - path: /tmp/registry.db -auth: - type: no_auth -entity_key_serialization_version: 3 diff --git a/feast/registry/local/README.md b/feast/registry/local/README.md new file mode 100644 index 0000000..40f13a9 --- /dev/null +++ b/feast/registry/local/README.md @@ -0,0 +1,40 @@ +# Local registry mode + +The notebook talks to the registry via a **local SQLite SQL file** written +directly by the Feast SDK — no gRPC server involved. + +## How it works + +- `feast apply` writes feature definitions to `sqlite:////tmp/registry.db` + (or a mounted PVC path — see `feature_store.yaml`). +- The registry is read back from the same file. No network hop, no Istio + concerns, no protocol negotiation. +- ODFVs (on-demand feature views) work without workarounds. + +## Trade-offs vs remote mode + +| | Local (this folder) | Remote | +|---|---|---| +| Registry persistence | Ephemeral (`/tmp`) by default | Persistent on operator PVC | +| Shared across clients | No — each notebook has its own `/tmp` | Yes — all clients in the namespace see the same definitions | +| ODFVs | Work out of the box | Require a monkey-patch workaround (Feast ≤ 0.63 bug) | +| Istio workaround | Not needed | Required (3-part) | +| Setup complexity | Low | Higher | + +## When to use + +Use local mode when: +- You are the only user of this feature store +- You want ODFVs without workarounds +- You are experimenting or iterating quickly + +Switch to remote mode when feast-dev/feast#6367 is merged (operator sets +`appProtocol: grpc` on its own Service) — at that point the Istio workaround +collapses and remote becomes the clear default. + +## Files + +| File | Purpose | +|------|---------| +| `feast-cr.yaml` | FeatureStore CR — no `server: {}`, registry PVC only | +| `feature_store.yaml` | Feast SDK config template (notebook writes this) | diff --git a/feast/registry/local/feast-cr.yaml b/feast/registry/local/feast-cr.yaml new file mode 100644 index 0000000..5efd0ae --- /dev/null +++ b/feast/registry/local/feast-cr.yaml @@ -0,0 +1,54 @@ +# FeatureStore CR — local registry mode. +# +# The operator will create: +# - A Feast deployment + service (online feature server) +# - PVCs for the registry and offline data store +# - A ConfigMap (feast--client) with client connection info +# +# The registry PVC is provisioned but the notebook talks to it directly +# via a local SQLite SQL file (not through the gRPC server). No Istio +# workaround is needed. +# +# Prerequisites: +# - feast-redis-config secret must exist in your namespace (see README) +apiVersion: feast.dev/v1 +kind: FeatureStore +metadata: + name: my-store + namespace: # <-- change this +spec: + feastProject: retail_features + services: + runFeastApplyOnInit: false + securityContext: + runAsUser: 0 + registry: + local: + # No server: {} here — the notebook uses the SQLite file directly. + persistence: + file: + pvc: + mountPath: /data/registry + create: + # storageClassName: default # omit to use cluster default + resources: + requests: + storage: 1Gi + offlineStore: + persistence: + file: + type: file + pvc: + mountPath: /data/offline + create: + storageClassName: mayastor-no-redundancy # adjust for your cluster + resources: + requests: + storage: 10Gi + onlineStore: + persistence: + store: + type: redis + secretRef: + name: feast-redis-config + secretKeyName: redis diff --git a/feast/registry/local/feature_store.yaml b/feast/registry/local/feature_store.yaml new file mode 100644 index 0000000..95145d2 --- /dev/null +++ b/feast/registry/local/feature_store.yaml @@ -0,0 +1,29 @@ +# Registry: local SQLite SQL on /tmp (ephemeral) or PVC (persistent). +# +# SQLite SQL gives proper transactional semantics over the plain file registry +# with identical maintenance burden — no server, just a file. +# +# Path options: +# Ephemeral (survives the notebook session, lost on pod restart): +# sqlite:////tmp/registry.db +# Persistent (mount the registry PVC at /data/registry first): +# sqlite:////data/registry/registry.db +# +# Note: four slashes = absolute path (SQLAlchemy convention for SQLite). +# PVC name: feast--registry +# +# The notebook writes this file automatically — you don't need to edit it. +project: retail_features +provider: local +offline_store: + type: file +online_store: + type: redis + connection_string: ":6379,password=" +registry: + registry_type: sql + path: sqlite:////tmp/registry.db + cache_ttl_seconds: 60 +auth: + type: no_auth +entity_key_serialization_version: 3 diff --git a/feast/registry/remote/README.md b/feast/registry/remote/README.md new file mode 100644 index 0000000..ff2a4ee --- /dev/null +++ b/feast/registry/remote/README.md @@ -0,0 +1,65 @@ +# Remote registry mode + +The notebook talks to the registry via the **gRPC server** that the Feast +Operator exposes from the FeatureStore CR. Feature definitions you `apply()` +persist on the operator-managed PVC and are visible to every other client in +the namespace. + +## How it works + +- `feast apply` sends definitions to the registry gRPC server over the + alt-Service created by `feast-istio-workaround.yaml`. +- All clients in the namespace share the same registry — no need to re-run + `apply()` after a notebook restart. +- The operator publishes a `feast--client` ConfigMap with the + connection details; the notebook reads it automatically. + +## Trade-offs vs local mode + +| | Remote (this folder) | Local | +|---|---|---| +| Registry persistence | Persistent on operator PVC | Ephemeral (`/tmp`) by default | +| Shared across clients | Yes | No | +| ODFVs | Require a monkey-patch (Feast ≤ 0.63 bug with dill+typeguard) | Work out of the box | +| Istio workaround | Required (3-part) | Not needed | +| Setup complexity | Higher | Low | + +## When to remove this workaround + +This mode requires `feast-istio-workaround.yaml` because the Feast Operator +creates the registry Service with `name: http` and no `appProtocol`, causing +Istio to misclassify gRPC traffic as HTTP/1.1. + +Once [feast-dev/feast#6367](https://github.com/feast-dev/feast/pull/6367) is +merged and you upgrade the operator, the workaround becomes unnecessary: +- Remove `feast-istio-workaround.yaml` and its `kubectl apply` step +- Remove the `podAnnotations` block from `feast-cr.yaml` +- Remove the `PandasTransformation.from_proto` monkey-patch from the notebook + +At that point, remote mode becomes the clear default and local mode can be +retired. + +## Setup + +Follow the top-level README through the Redis and `feast-redis-config` steps, +then: + +```bash +# 1. Deploy the FeatureStore CR +kubectl apply -f registry/remote/feast-cr.yaml +kubectl get featurestore -n -w # wait until Ready + +# 2. Apply the Istio workaround +sed 's//my-store/g; s///g' \ + registry/remote/feast-istio-workaround.yaml | kubectl apply -f - +``` + +Then open the notebook and select **Remote** when prompted. + +## Files + +| File | Purpose | +|------|---------| +| `feast-cr.yaml` | FeatureStore CR with `server: {}` and Istio pod annotation | +| `feature_store.yaml` | Feast SDK config template (notebook writes this from the operator ConfigMap) | +| `feast-istio-workaround.yaml` | Alt-Service + DestinationRule to fix Istio gRPC misclassification | diff --git a/feast/feast-cr.yaml b/feast/registry/remote/feast-cr.yaml similarity index 80% rename from feast/feast-cr.yaml rename to feast/registry/remote/feast-cr.yaml index 48ab975..fb23f62 100644 --- a/feast/feast-cr.yaml +++ b/feast/registry/remote/feast-cr.yaml @@ -1,13 +1,20 @@ -# Example FeatureStore CR for prokube. -# Edit the namespace to match your Kubeflow profile. +# FeatureStore CR — remote registry mode. # # The operator will create: # - A Feast deployment + service (online feature server) # - PVCs for the SQLite registry and offline data store # - A ConfigMap (feast--client) with client connection info # +# This CR enables the registry gRPC server (services.registry.local.server) +# so notebooks and other clients can read and write feature definitions +# remotely. Definitions persist on the operator-managed PVC across restarts +# and are shared with every client in the namespace. +# +# Requires the Istio workaround in feast-istio-workaround.yaml — see the +# Known Limitations section in registry/remote/README.md. +# # Prerequisites: -# - feast-redis-config secret must exist in your namespace (see README) +# - feast-redis-config secret must exist in your namespace (see top-level README) apiVersion: feast.dev/v1 kind: FeatureStore metadata: diff --git a/feast/feast-istio-workaround.yaml b/feast/registry/remote/feast-istio-workaround.yaml similarity index 100% rename from feast/feast-istio-workaround.yaml rename to feast/registry/remote/feast-istio-workaround.yaml diff --git a/feast/registry/remote/feature_store.yaml b/feast/registry/remote/feature_store.yaml new file mode 100644 index 0000000..8d17597 --- /dev/null +++ b/feast/registry/remote/feature_store.yaml @@ -0,0 +1,18 @@ +# Registry: remote gRPC server managed by the Feast Operator. +# +# The notebook reads this from the operator-published ConfigMap +# (feast--client) and overrides online_store with a direct Redis +# connection for materialization. You don't need to edit this file manually. +project: retail_features +provider: local +offline_store: + type: file +online_store: + type: redis + connection_string: ":6379,password=" +registry: + registry_type: remote + path: grpc://feast-my-store-registry-grpc..svc.cluster.local:80 +auth: + type: no_auth +entity_key_serialization_version: 3 From a569015c31f6cef81015a1017dc1dece364479ed Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 15 Jun 2026 15:31:31 +0200 Subject: [PATCH 19/22] feast: remove Istio workarounds, simplify remote mode for operator 0.64 Operator 0.64 sets appProtocol: grpc on the registry Service, fixing the protocol misclassification that required the three-part Istio workaround. - Delete feast-istio-workaround.yaml (alt-Service + DestinationRule) - Remove excludeInboundPorts pod annotation from feast-cr.yaml - Remove PandasTransformation.from_proto monkey-patch from notebook - Update registry URL in feature_store.yaml to native operator service - Reframe network-policies.yaml as defence-in-depth (no longer required to compensate for sidecar bypass) - Strip all workaround documentation from READMEs and notebook widget --- feast/README.md | 19 +---- feast/feast_example.ipynb | 34 +-------- feast/registry/local/README.md | 12 ++-- feast/registry/local/feast-cr.yaml | 3 +- feast/registry/remote/README.md | 27 +------ feast/registry/remote/feast-cr.yaml | 11 --- .../remote/feast-istio-workaround.yaml | 72 ------------------- feast/registry/remote/feature_store.yaml | 2 +- feast/registry/remote/network-policies.yaml | 57 +++++++++++++++ 9 files changed, 69 insertions(+), 168 deletions(-) delete mode 100644 feast/registry/remote/feast-istio-workaround.yaml create mode 100644 feast/registry/remote/network-policies.yaml diff --git a/feast/README.md b/feast/README.md index a691060..8d78af8 100644 --- a/feast/README.md +++ b/feast/README.md @@ -50,8 +50,6 @@ There are two registry modes. **Pick one:** | | Local | Remote | |---|---|---| | **Registry** | SQLite SQL on `/tmp` (ephemeral) or PVC (persistent) | gRPC server on operator PVC (persistent, shared) | -| **ODFVs** | Work out of the box | Require a monkey-patch (Feast ≤ 0.63 bug) | -| **Istio workaround** | Not needed | Required until [feast#6367](https://github.com/feast-dev/feast/pull/6367) merges | | **Good for** | Single user, quick iteration | Teams sharing definitions across clients | **Local:** @@ -64,10 +62,6 @@ kubectl get featurestore -n -w ```bash kubectl apply -f registry/remote/feast-cr.yaml # edit namespace first kubectl get featurestore -n -w - -# Apply the Istio workaround (required for remote mode) -sed 's//my-store/g; s///g' \ - registry/remote/feast-istio-workaround.yaml | kubectl apply -f - ``` ### 4. Run the notebook @@ -89,8 +83,8 @@ feast/ remote/ feast-cr.yaml FeatureStore CR — remote gRPC registry server feature_store.yaml Feast SDK config template - feast-istio-workaround.yaml Alt-Service + DestinationRule for Istio fix - README.md Remote mode details and when to retire the workaround + network-policies.yaml CNI-layer NetworkPolicies for isolation + README.md Remote mode details and trade-offs ``` ## Architecture @@ -119,12 +113,3 @@ Feast has three stores: features │ │ └──────────────────────────────────────┘ ``` - -## Known limitations (remote mode only) - -The Feast Operator creates the registry Service with `name: http` and no -`appProtocol`, causing Istio to misclassify gRPC traffic as HTTP/1.1. The -three-part workaround in `registry/remote/feast-istio-workaround.yaml` -addresses this. Once [feast-dev/feast#6367](https://github.com/feast-dev/feast/pull/6367) -merges and you upgrade the operator, the workaround and the local mode -fallback both become unnecessary. diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index 1e3cf69..e084efd 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -94,7 +94,7 @@ " ),\n", " (\n", " \"Remote — operator gRPC server \"\n", - " \"(persistent, shared across clients; use registry/remote/feast-cr.yaml + istio workaround)\",\n", + " \"(persistent, shared across clients; use registry/remote/feast-cr.yaml)\",\n", " \"remote\",\n", " ),\n", " ],\n", @@ -393,38 +393,6 @@ "\n", "\n", "# ---------------------------------------------------------------------------\n", - "# Workaround (remote mode only): Feast ≤ 0.63 dill + typeguard hang.\n", - "#\n", - "# When Feast fetches an ODFV from the remote gRPC registry it calls\n", - "# PandasTransformation.from_proto(), which calls dill.loads() to reconstruct\n", - "# the UDF. dill reconstructs the function's __globals__, re-importing feast\n", - "# modules and hitting typeguard's AST instrumentation — a deeply recursive\n", - "# traversal that hangs indefinitely.\n", - "#\n", - "# Fix: replace from_proto with a version that looks up the live function by\n", - "# name instead of deserializing via dill.\n", - "#\n", - "# Not needed in local mode (ODFV UDF is never serialized via dill).\n", - "# Remove once feast-dev/feast#6367 merges and the operator is upgraded.\n", - "# ---------------------------------------------------------------------------\n", - "if REGISTRY_MODE == \"remote\":\n", - " from feast.transformation.pandas_transformation import PandasTransformation as _PT\n", - "\n", - " _UDF_REGISTRY = {\n", - " \"customer_risk_features\": customer_risk_features.feature_transformation.udf,\n", - " }\n", - "\n", - " _orig_from_proto = _PT.from_proto.__func__\n", - "\n", - " @classmethod\n", - " def _fast_from_proto(cls, proto):\n", - " if proto.name in _UDF_REGISTRY:\n", - " return cls(udf=_UDF_REGISTRY[proto.name], udf_string=proto.body_text)\n", - " return _orig_from_proto(cls, proto)\n", - "\n", - " _PT.from_proto = _fast_from_proto\n", - "\n", - "# ---------------------------------------------------------------------------\n", "# FeatureService: a named bundle of feature views.\n", "#\n", "# Instead of listing individual feature names in every get_online_features()\n", diff --git a/feast/registry/local/README.md b/feast/registry/local/README.md index 40f13a9..2e73b13 100644 --- a/feast/registry/local/README.md +++ b/feast/registry/local/README.md @@ -7,8 +7,8 @@ directly by the Feast SDK — no gRPC server involved. - `feast apply` writes feature definitions to `sqlite:////tmp/registry.db` (or a mounted PVC path — see `feature_store.yaml`). -- The registry is read back from the same file. No network hop, no Istio - concerns, no protocol negotiation. +- The registry is read back from the same file. No network hop, no protocol + negotiation. - ODFVs (on-demand feature views) work without workarounds. ## Trade-offs vs remote mode @@ -17,20 +17,16 @@ directly by the Feast SDK — no gRPC server involved. |---|---|---| | Registry persistence | Ephemeral (`/tmp`) by default | Persistent on operator PVC | | Shared across clients | No — each notebook has its own `/tmp` | Yes — all clients in the namespace see the same definitions | -| ODFVs | Work out of the box | Require a monkey-patch workaround (Feast ≤ 0.63 bug) | -| Istio workaround | Not needed | Required (3-part) | | Setup complexity | Low | Higher | ## When to use Use local mode when: - You are the only user of this feature store -- You want ODFVs without workarounds - You are experimenting or iterating quickly -Switch to remote mode when feast-dev/feast#6367 is merged (operator sets -`appProtocol: grpc` on its own Service) — at that point the Istio workaround -collapses and remote becomes the clear default. +Use remote mode when you need definitions to persist across pod restarts or +be shared with other clients in the namespace. ## Files diff --git a/feast/registry/local/feast-cr.yaml b/feast/registry/local/feast-cr.yaml index 5efd0ae..02519e6 100644 --- a/feast/registry/local/feast-cr.yaml +++ b/feast/registry/local/feast-cr.yaml @@ -6,8 +6,7 @@ # - A ConfigMap (feast--client) with client connection info # # The registry PVC is provisioned but the notebook talks to it directly -# via a local SQLite SQL file (not through the gRPC server). No Istio -# workaround is needed. +# via a local SQLite SQL file (not through the gRPC server). # # Prerequisites: # - feast-redis-config secret must exist in your namespace (see README) diff --git a/feast/registry/remote/README.md b/feast/registry/remote/README.md index ff2a4ee..e6f0c6f 100644 --- a/feast/registry/remote/README.md +++ b/feast/registry/remote/README.md @@ -8,7 +8,7 @@ the namespace. ## How it works - `feast apply` sends definitions to the registry gRPC server over the - alt-Service created by `feast-istio-workaround.yaml`. + operator's native registry Service. - All clients in the namespace share the same registry — no need to re-run `apply()` after a notebook restart. - The operator publishes a `feast--client` ConfigMap with the @@ -20,25 +20,8 @@ the namespace. |---|---|---| | Registry persistence | Persistent on operator PVC | Ephemeral (`/tmp`) by default | | Shared across clients | Yes | No | -| ODFVs | Require a monkey-patch (Feast ≤ 0.63 bug with dill+typeguard) | Work out of the box | -| Istio workaround | Required (3-part) | Not needed | | Setup complexity | Higher | Low | -## When to remove this workaround - -This mode requires `feast-istio-workaround.yaml` because the Feast Operator -creates the registry Service with `name: http` and no `appProtocol`, causing -Istio to misclassify gRPC traffic as HTTP/1.1. - -Once [feast-dev/feast#6367](https://github.com/feast-dev/feast/pull/6367) is -merged and you upgrade the operator, the workaround becomes unnecessary: -- Remove `feast-istio-workaround.yaml` and its `kubectl apply` step -- Remove the `podAnnotations` block from `feast-cr.yaml` -- Remove the `PandasTransformation.from_proto` monkey-patch from the notebook - -At that point, remote mode becomes the clear default and local mode can be -retired. - ## Setup Follow the top-level README through the Redis and `feast-redis-config` steps, @@ -48,10 +31,6 @@ then: # 1. Deploy the FeatureStore CR kubectl apply -f registry/remote/feast-cr.yaml kubectl get featurestore -n -w # wait until Ready - -# 2. Apply the Istio workaround -sed 's//my-store/g; s///g' \ - registry/remote/feast-istio-workaround.yaml | kubectl apply -f - ``` Then open the notebook and select **Remote** when prompted. @@ -60,6 +39,6 @@ Then open the notebook and select **Remote** when prompted. | File | Purpose | |------|---------| -| `feast-cr.yaml` | FeatureStore CR with `server: {}` and Istio pod annotation | +| `feast-cr.yaml` | FeatureStore CR with `server: {}` to enable the gRPC registry | | `feature_store.yaml` | Feast SDK config template (notebook writes this from the operator ConfigMap) | -| `feast-istio-workaround.yaml` | Alt-Service + DestinationRule to fix Istio gRPC misclassification | +| `network-policies.yaml` | CNI-layer NetworkPolicies for registry and Redis isolation | diff --git a/feast/registry/remote/feast-cr.yaml b/feast/registry/remote/feast-cr.yaml index fb23f62..07e0c6a 100644 --- a/feast/registry/remote/feast-cr.yaml +++ b/feast/registry/remote/feast-cr.yaml @@ -10,9 +10,6 @@ # remotely. Definitions persist on the operator-managed PVC across restarts # and are shared with every client in the namespace. # -# Requires the Istio workaround in feast-istio-workaround.yaml — see the -# Known Limitations section in registry/remote/README.md. -# # Prerequisites: # - feast-redis-config secret must exist in your namespace (see top-level README) apiVersion: feast.dev/v1 @@ -26,14 +23,6 @@ spec: runFeastApplyOnInit: false securityContext: runAsUser: 0 - # The operator-generated registry Service has port name "http" and no - # appProtocol. This misleads the SERVER-side envoy into classifying port - # 6570 as HTTP/1.1. Bypassing server-side envoy interception entirely - # (excludeInboundPorts) is the only workaround until the operator sets - # appProtocol: grpc on its own Service. See feast-istio-workaround.yaml - # for the client-side alt-Service + DestinationRule. - podAnnotations: - traffic.sidecar.istio.io/excludeInboundPorts: "6570" registry: local: # Expose the registry as a gRPC server so notebooks/clients can read diff --git a/feast/registry/remote/feast-istio-workaround.yaml b/feast/registry/remote/feast-istio-workaround.yaml deleted file mode 100644 index 6812897..0000000 --- a/feast/registry/remote/feast-istio-workaround.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Istio workaround for the Feast registry gRPC service. -# -# Root cause -# ---------- -# The operator creates the registry Service with port name "http" and no -# appProtocol. This single omission misleads BOTH envoy sidecars: -# -# CLIENT-side envoy: sees "http" → classifies traffic as HTTP/1.1, -# downgrading gRPC (which requires HTTP/2). -# -# SERVER-side envoy: builds its inbound listener for port 6570 from the -# same Service → classifies port 6570 as HTTP/1.1 → receives gRPC -# (HTTP/2) → sends RST ("protocol error"). -# -# Three-part workaround (until operator sets appProtocol: grpc upstream) -# ----------------------------------------------------------------------- -# 1. feast-cr.yaml: excludeInboundPorts: "6570" -# Bypass the server-side envoy entirely for port 6570. Since the server -# envoy's inbound listener is misconfigured (HTTP/1.1), we skip it so -# gRPC reaches the feast container directly. -# -# 2. This file — alt-Service with appProtocol: grpc: -# Fix the CLIENT-side envoy's protocol detection so it sends HTTP/2. -# -# 3. This file — DestinationRule tls: DISABLE: -# Since the server-side envoy is bypassed (no mTLS termination), the -# client must not attempt mTLS. Plain HTTP/2 goes directly to the -# feast container. -# -# When the operator sets appProtocol: grpc on its own Service, all three -# workarounds become unnecessary: both envoys detect the protocol correctly -# and normal mTLS flows end-to-end. -# -# Usage -# ----- -# Replace with your FeatureStore CR name and with your -# Kubeflow profile namespace, then apply: -# -# sed 's//my-store/g; s//my-namespace/g' \ -# feast-istio-workaround.yaml | kubectl apply -f - -# -# In feature_store.yaml, point the registry at the alt-Service: -# -# registry: -# registry_type: remote -# path: grpc://feast--registry-grpc..svc.cluster.local:80 -# -# --------------------------------------------------------------------------- -apiVersion: v1 -kind: Service -metadata: - name: feast--registry-grpc - namespace: -spec: - selector: - app: feast- # operator labels the feast-server pod with this - ports: - - name: grpc - port: 80 - targetPort: 6570 - appProtocol: grpc ---- -apiVersion: networking.istio.io/v1beta1 -kind: DestinationRule -metadata: - name: feast--registry-grpc - namespace: -spec: - host: feast--registry-grpc..svc.cluster.local - trafficPolicy: - tls: - mode: DISABLE diff --git a/feast/registry/remote/feature_store.yaml b/feast/registry/remote/feature_store.yaml index 8d17597..04b651b 100644 --- a/feast/registry/remote/feature_store.yaml +++ b/feast/registry/remote/feature_store.yaml @@ -12,7 +12,7 @@ online_store: connection_string: ":6379,password=" registry: registry_type: remote - path: grpc://feast-my-store-registry-grpc..svc.cluster.local:80 + path: grpc://feast-my-store-registry..svc.cluster.local:6570 auth: type: no_auth entity_key_serialization_version: 3 diff --git a/feast/registry/remote/network-policies.yaml b/feast/registry/remote/network-policies.yaml new file mode 100644 index 0000000..b740db3 --- /dev/null +++ b/feast/registry/remote/network-policies.yaml @@ -0,0 +1,57 @@ +# NetworkPolicies for Feast registry and online store isolation. +# +# These policies restrict registry and Redis access to pods within the same +# namespace, providing defense-in-depth alongside Istio AuthorizationPolicies. +# NetworkPolicies operate at the CNI layer and enforce isolation regardless of +# sidecar configuration. +# +# Usage +# ----- +# Replace with your FeatureStore CR name and with your +# Kubeflow profile namespace, then apply: +# +# sed 's//my-store/g; s//my-namespace/g' \ +# network-policies.yaml | kubectl apply -f - +# +# --------------------------------------------------------------------------- + +# Feast registry server: allow ingress on port 6570 only from pods in the +# same namespace. Pods in other namespaces — including other Feast instances +# — are blocked at the CNI layer. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: feast--registry-ingress + namespace: +spec: + podSelector: + matchLabels: + app: feast- # label applied by the Feast operator to the server pod + policyTypes: + - Ingress + ingress: + - from: + - podSelector: {} # any pod in the same namespace + ports: + - port: 6570 + protocol: TCP +--- +# Redis online store: allow ingress on port 6379 only from pods in the +# same namespace. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: redis-feast-ingress + namespace: +spec: + podSelector: + matchLabels: + app: redis-feast # label applied by the OpsTree Redis operator (app: ) + policyTypes: + - Ingress + ingress: + - from: + - podSelector: {} # any pod in the same namespace + ports: + - port: 6379 + protocol: TCP From 28fee149fd84fd1e548e15ea424b70be03691052 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 15 Jun 2026 16:32:45 +0200 Subject: [PATCH 20/22] feast: drop namespace placeholders, document network policies - Remove namespace fields from CRs and kubectl commands (kubectl uses the current context namespace when run from a notebook) - Replace placeholder in network-policies.yaml with concrete my-store name; drop the sed usage block - Add Network policies section to remote README explaining what each policy protects and add kubectl apply step to setup instructions --- feast/README.md | 10 ++++------ feast/redis-cr.yaml | 1 - feast/registry/local/feast-cr.yaml | 1 - feast/registry/remote/README.md | 18 +++++++++++++++++- feast/registry/remote/feast-cr.yaml | 1 - feast/registry/remote/network-policies.yaml | 14 ++------------ 6 files changed, 23 insertions(+), 22 deletions(-) diff --git a/feast/README.md b/feast/README.md index 8d78af8..1407610 100644 --- a/feast/README.md +++ b/feast/README.md @@ -18,7 +18,6 @@ training a return-risk model, and serving predictions in real time. ```bash kubectl create secret generic redis-feast \ - -n \ --from-literal=password=$(openssl rand -base64 24 | tr -d '/') kubectl apply -f redis-cr.yaml # edit namespace first @@ -29,7 +28,7 @@ kubectl get redis -n -w ```bash NAMESPACE= -PASSWORD=$(kubectl get secret redis-feast -n $NAMESPACE \ +PASSWORD=$(kubectl get secret redis-feast \ -o jsonpath='{.data.password}' | base64 -d) cat > /tmp/redis-config.yaml << EOF @@ -37,7 +36,6 @@ connection_string: "redis-feast.${NAMESPACE}.svc.cluster.local:6379,password=${P EOF kubectl create secret generic feast-redis-config \ - -n $NAMESPACE \ --from-file=redis=/tmp/redis-config.yaml rm /tmp/redis-config.yaml @@ -101,11 +99,11 @@ Feast has three stores: ┌──────────────────────────────────────┐ │ Your Namespace │ │ │ - │ Redis CR (redis-feast) │ + │ Redis CR (redis-feast) │ │ │ store.apply() ───▶ Registry │ - (notebook) │ local: sqlite:////tmp/registry.db │ - │ remote: gRPC → operator PVC │ + (notebook) │ local: sqlite:////tmp/registry.db │ + │ remote: gRPC → operator PVC │ │ │ materialize ──────▶ Redis online store │ │ │ diff --git a/feast/redis-cr.yaml b/feast/redis-cr.yaml index df50c01..fd5846c 100644 --- a/feast/redis-cr.yaml +++ b/feast/redis-cr.yaml @@ -9,7 +9,6 @@ apiVersion: redis.redis.opstreelabs.in/v1beta2 kind: Redis metadata: name: redis-feast - namespace: # <-- change this spec: kubernetesConfig: image: quay.io/opstree/redis:v7.0.15 diff --git a/feast/registry/local/feast-cr.yaml b/feast/registry/local/feast-cr.yaml index 02519e6..74b109e 100644 --- a/feast/registry/local/feast-cr.yaml +++ b/feast/registry/local/feast-cr.yaml @@ -14,7 +14,6 @@ apiVersion: feast.dev/v1 kind: FeatureStore metadata: name: my-store - namespace: # <-- change this spec: feastProject: retail_features services: diff --git a/feast/registry/remote/README.md b/feast/registry/remote/README.md index e6f0c6f..30ab35f 100644 --- a/feast/registry/remote/README.md +++ b/feast/registry/remote/README.md @@ -22,6 +22,19 @@ the namespace. | Shared across clients | Yes | No | | Setup complexity | Higher | Low | +## Network policies + +`network-policies.yaml` restricts access to the registry and Redis to pods +within the same namespace. This is defense-in-depth alongside the +namespace-isolation AuthorizationPolicy that the Kubeflow profile controller +creates — NetworkPolicies are enforced at the CNI layer independently of the +Istio mesh. + +| Policy | Protects | Port | +|--------|----------|------| +| `feast-my-store-registry-ingress` | Feast registry gRPC server | 6570 | +| `redis-feast-ingress` | Redis online store | 6379 | + ## Setup Follow the top-level README through the Redis and `feast-redis-config` steps, @@ -30,7 +43,10 @@ then: ```bash # 1. Deploy the FeatureStore CR kubectl apply -f registry/remote/feast-cr.yaml -kubectl get featurestore -n -w # wait until Ready +kubectl get featurestore -w # wait until Ready + +# 2. Apply the network policies +kubectl apply -f registry/remote/network-policies.yaml ``` Then open the notebook and select **Remote** when prompted. diff --git a/feast/registry/remote/feast-cr.yaml b/feast/registry/remote/feast-cr.yaml index 07e0c6a..4074d6e 100644 --- a/feast/registry/remote/feast-cr.yaml +++ b/feast/registry/remote/feast-cr.yaml @@ -16,7 +16,6 @@ apiVersion: feast.dev/v1 kind: FeatureStore metadata: name: my-store - namespace: # <-- change this spec: feastProject: retail_features services: diff --git a/feast/registry/remote/network-policies.yaml b/feast/registry/remote/network-policies.yaml index b740db3..93a1daf 100644 --- a/feast/registry/remote/network-policies.yaml +++ b/feast/registry/remote/network-policies.yaml @@ -5,14 +5,6 @@ # NetworkPolicies operate at the CNI layer and enforce isolation regardless of # sidecar configuration. # -# Usage -# ----- -# Replace with your FeatureStore CR name and with your -# Kubeflow profile namespace, then apply: -# -# sed 's//my-store/g; s//my-namespace/g' \ -# network-policies.yaml | kubectl apply -f - -# # --------------------------------------------------------------------------- # Feast registry server: allow ingress on port 6570 only from pods in the @@ -21,12 +13,11 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: - name: feast--registry-ingress - namespace: + name: feast-my-store-registry-ingress spec: podSelector: matchLabels: - app: feast- # label applied by the Feast operator to the server pod + app: feast-my-store # label applied by the Feast operator to the server pod policyTypes: - Ingress ingress: @@ -42,7 +33,6 @@ apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: redis-feast-ingress - namespace: spec: podSelector: matchLabels: From 02c613f1582280762203c0aeb3fcaf863023615c Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 15 Jun 2026 16:40:03 +0200 Subject: [PATCH 21/22] feast: move infrastructure setup into notebook, no terminal needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an Infrastructure setup cell that handles everything from a fresh namespace: Redis password secret, Redis CR deployment (with readiness wait), feast-redis-config secret, FeatureStore CR (with readiness wait), and NetworkPolicies for remote mode. - NAMESPACE is read from the pod service account — no user input needed - Each step is idempotent: skips resources that already exist - Utility helpers (kubectl_json, get_namespace, secret_exists) are defined once in the setup cell and reused by the configure cell - README simplified to 'open the notebook and run all cells' --- feast/README.md | 60 ++----------- feast/feast_example.ipynb | 173 +++++++++++++++++++++++++++++++++----- 2 files changed, 162 insertions(+), 71 deletions(-) diff --git a/feast/README.md b/feast/README.md index 1407610..8fccd41 100644 --- a/feast/README.md +++ b/feast/README.md @@ -7,72 +7,30 @@ feature management in ML workflows. return their next order. The notebook walks through defining customer features, training a return-risk model, and serving predictions in real time. -## Prerequisites - -- Feast must be enabled on your cluster (ask your admin) -- You have `kubectl` access to your Kubeflow profile namespace - ## Quick Start -### 1. Deploy a Redis instance - -```bash -kubectl create secret generic redis-feast \ - --from-literal=password=$(openssl rand -base64 24 | tr -d '/') - -kubectl apply -f redis-cr.yaml # edit namespace first -kubectl get redis -n -w -``` - -### 2. Create the Feast Redis secret - -```bash -NAMESPACE= -PASSWORD=$(kubectl get secret redis-feast \ - -o jsonpath='{.data.password}' | base64 -d) - -cat > /tmp/redis-config.yaml << EOF -connection_string: "redis-feast.${NAMESPACE}.svc.cluster.local:6379,password=${PASSWORD}" -EOF +1. Feast must be enabled on your cluster (ask your admin) +2. Clone this repository to your notebook server +3. Open `feast_example.ipynb` from the `feast/` directory and run all cells -kubectl create secret generic feast-redis-config \ - --from-file=redis=/tmp/redis-config.yaml +The notebook's **Infrastructure setup** cell handles everything automatically: +Redis, secrets, FeatureStore CR, and (for remote mode) NetworkPolicies. -rm /tmp/redis-config.yaml -``` - -### 3. Choose a registry mode and deploy the FeatureStore +## Registry modes -There are two registry modes. **Pick one:** +There are two registry modes. Select one in the notebook when prompted: | | Local | Remote | |---|---|---| -| **Registry** | SQLite SQL on `/tmp` (ephemeral) or PVC (persistent) | gRPC server on operator PVC (persistent, shared) | +| **Registry** | SQLite SQL on `/tmp` (ephemeral) | gRPC server on operator PVC (persistent, shared) | | **Good for** | Single user, quick iteration | Teams sharing definitions across clients | -**Local:** -```bash -kubectl apply -f registry/local/feast-cr.yaml # edit namespace first -kubectl get featurestore -n -w -``` - -**Remote:** -```bash -kubectl apply -f registry/remote/feast-cr.yaml # edit namespace first -kubectl get featurestore -n -w -``` - -### 4. Run the notebook - -Open `feast_example.ipynb`. The first cell will ask which registry mode you -chose — select it there and run all cells. - ## Files ``` feast/ feast_example.ipynb End-to-end notebook (works with both modes) - redis-cr.yaml Deploys a Redis instance (OpsTree operator) + redis-cr.yaml Redis instance CR (OpsTree operator) registry/ local/ feast-cr.yaml FeatureStore CR — local SQLite SQL registry diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index e084efd..b5d0576 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -22,25 +22,15 @@ "6. **Materialize** — push latest values to Redis for online serving\n", "7. **Serve** — predict return risk for incoming orders in real time\n", "\n", - "Everything happens inline in this notebook — no external Python files or CLI\n", - "commands needed.\n", + "Everything happens inline in this notebook — no terminal needed.\n", "\n", "### Prerequisites\n", "\n", - "**Before running this notebook**, follow the setup steps in\n", - "[`README.md`](README.md) first. You need to create:\n", + "- Feast must be enabled on your cluster (ask your admin)\n", + "- Open this notebook from the `feast/` directory of the repository\n", "\n", - "1. The **Redis password secret** (`redis-feast`)\n", - "2. The **Redis instance** (`redis-cr.yaml`) — wait until the pod is Running\n", - "3. The **Feast Redis secret** (`feast-redis-config`)\n", - "4. The **FeatureStore CR** (`feast-cr.yaml`) — wait until Ready\n", - "\n", - "If any of these are missing, the notebook will fail at the \"Configure the\n", - "Feast client\" step.\n", - "\n", - "> **Note:** This setup uses a SQLite registry on `/tmp` which does not\n", - "> survive pod restarts. For a production-ready deployment, see the\n", - "> [Production Setup](#Production-Setup) section at the end.\n" + "Run all cells in order. The **Infrastructure setup** cell deploys Redis,\n", + "creates secrets, and deploys the FeatureStore CR automatically.\n" ] }, { @@ -105,21 +95,46 @@ "display(registry_widget)\n" ] }, + { + "cell_type": "markdown", + "id": "infra_setup_header", + "metadata": {}, + "source": [ + "### Infrastructure setup\n", + "\n", + "The cell below deploys everything Feast needs and is safe to re-run — each\n", + "step checks whether the resource already exists and skips it if so.\n", + "\n", + "| Step | What it creates |\n", + "|------|-----------------|\n", + "| 1 | `redis-feast` secret — a random Redis password |\n", + "| 2 | Redis instance (`redis-cr.yaml`) — waits until the pod is Ready |\n", + "| 3 | `feast-redis-config` secret — connection string for the Feast operator |\n", + "| 4 | FeatureStore CR (`registry//feast-cr.yaml`) — waits until Ready |\n", + "| 5 | NetworkPolicies (remote mode only) — restricts registry and Redis to this namespace |\n" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "a8804025", + "id": "infra_setup", "metadata": {}, "outputs": [], "source": [ "import base64\n", "import json\n", + "import os\n", + "import secrets as _secrets\n", "import subprocess\n", + "import tempfile\n", + "import time\n", "\n", "import yaml\n", "\n", - "REGISTRY_MODE = registry_widget.value # 'local' or 'remote'\n", "\n", + "# ---------------------------------------------------------------------------\n", + "# Utility helpers used throughout the notebook.\n", + "# ---------------------------------------------------------------------------\n", "\n", "def kubectl_json(*args):\n", " return json.loads(subprocess.check_output([\"kubectl\", *args, \"-o\", \"json\"]))\n", @@ -136,11 +151,130 @@ " ).decode().strip()\n", "\n", "\n", - "# Find the FeatureStore CR in this namespace (needed by both modes).\n", + "def secret_exists(name):\n", + " return subprocess.run([\"kubectl\", \"get\", \"secret\", name], capture_output=True).returncode == 0\n", + "\n", + "\n", + "# Verify we are in the right directory — relative apply paths depend on this.\n", + "if not os.path.exists(\"redis-cr.yaml\"):\n", + " raise RuntimeError(\n", + " \"redis-cr.yaml not found. Open this notebook from the feast/ directory.\"\n", + " )\n", + "\n", + "NAMESPACE = get_namespace()\n", + "REGISTRY_MODE = registry_widget.value\n", + "print(f\"Namespace: {NAMESPACE}\")\n", + "print(f\"Registry mode: {REGISTRY_MODE}\")\n", + "print()\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# 1. Redis password secret\n", + "# ---------------------------------------------------------------------------\n", + "if not secret_exists(\"redis-feast\"):\n", + " password = _secrets.token_urlsafe(18)\n", + " subprocess.check_call([\n", + " \"kubectl\", \"create\", \"secret\", \"generic\", \"redis-feast\",\n", + " f\"--from-literal=password={password}\",\n", + " ])\n", + " print(\"Created redis-feast secret\")\n", + "else:\n", + " _pw = kubectl_json(\"get\", \"secret\", \"redis-feast\")\n", + " password = base64.b64decode(_pw[\"data\"][\"password\"]).decode()\n", + " print(\"redis-feast: already exists\")\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# 2. Redis instance\n", + "# ---------------------------------------------------------------------------\n", + "subprocess.check_call([\"kubectl\", \"apply\", \"-f\", \"redis-cr.yaml\"])\n", + "print(\"Waiting for Redis pod to be Ready\", end=\"\", flush=True)\n", + "for _ in range(60):\n", + " _r = subprocess.run(\n", + " [\"kubectl\", \"get\", \"pods\", \"-l\", \"app=redis-feast\",\n", + " \"-o\", \"jsonpath={.items[0].status.conditions[?(@.type=='Ready')].status}\"],\n", + " capture_output=True, text=True,\n", + " )\n", + " if _r.stdout.strip() == \"True\":\n", + " break\n", + " time.sleep(5)\n", + " print(\".\", end=\"\", flush=True)\n", + "else:\n", + " raise RuntimeError(\"Redis pod did not become Ready within 5 minutes\")\n", + "print(\" done\")\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# 3. Feast Redis connection secret\n", + "#\n", + "# Key 'redis' holds a YAML snippet — the Feast operator reads it as:\n", + "# yaml.safe_load(secret['redis'])['connection_string']\n", + "# ---------------------------------------------------------------------------\n", + "if not secret_exists(\"feast-redis-config\"):\n", + " _conn = f\"redis-feast.{NAMESPACE}.svc.cluster.local:6379,password={password}\"\n", + " with tempfile.NamedTemporaryFile(mode=\"w\", suffix=\".yaml\", delete=False) as _f:\n", + " _f.write(f\"connection_string: '{_conn}'\\n\")\n", + " _tmp = _f.name\n", + " subprocess.check_call([\n", + " \"kubectl\", \"create\", \"secret\", \"generic\", \"feast-redis-config\",\n", + " f\"--from-file=redis={_tmp}\",\n", + " ])\n", + " os.unlink(_tmp)\n", + " print(\"Created feast-redis-config secret\")\n", + "else:\n", + " print(\"feast-redis-config: already exists\")\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# 4. FeatureStore CR\n", + "# ---------------------------------------------------------------------------\n", + "_cr = f\"registry/{REGISTRY_MODE}/feast-cr.yaml\"\n", + "subprocess.check_call([\"kubectl\", \"apply\", \"-f\", _cr])\n", + "print(\"Waiting for FeatureStore to be Ready\", end=\"\", flush=True)\n", + "for _ in range(60):\n", + " _r = subprocess.run(\n", + " [\"kubectl\", \"get\", \"featurestore\", \"my-store\",\n", + " \"-o\", \"jsonpath={.status.clientConfigMap}\"],\n", + " capture_output=True, text=True,\n", + " )\n", + " if _r.stdout.strip():\n", + " break\n", + " time.sleep(5)\n", + " print(\".\", end=\"\", flush=True)\n", + "else:\n", + " raise RuntimeError(\"FeatureStore did not become Ready within 5 minutes\")\n", + "print(\" done\")\n", + "\n", + "# ---------------------------------------------------------------------------\n", + "# 5. Network policies (remote mode only)\n", + "# ---------------------------------------------------------------------------\n", + "if REGISTRY_MODE == \"remote\":\n", + " subprocess.check_call([\"kubectl\", \"apply\", \"-f\", \"registry/remote/network-policies.yaml\"])\n", + " print(\"Applied network policies\")\n", + "\n", + "print(\"\\nInfrastructure ready.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "configure_client_header", + "metadata": {}, + "source": [ + "### Configure the Feast client\n", + "\n", + "Reads the FeatureStore CR and the Redis secret, then writes `feature_store.yaml`\n", + "— the SDK config that tells Feast where the registry, online store, and offline\n", + "store live. Re-run this cell if you restart the kernel without re-running setup.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8804025", + "metadata": {}, + "outputs": [], + "source": [ + "# Find the FeatureStore CR deployed by the setup cell.\n", "fs_list = kubectl_json(\"get\", \"featurestore\")[\"items\"]\n", "if not fs_list:\n", " raise RuntimeError(\n", - " \"No FeatureStore CR found — apply the feast-cr.yaml for your chosen mode first.\"\n", + " \"No FeatureStore CR found — run the infrastructure setup cell first.\"\n", " )\n", "fs = fs_list[0]\n", "fs_name = fs[\"metadata\"][\"name\"]\n", @@ -188,7 +322,6 @@ "with open(\"feature_store.yaml\", \"w\") as f:\n", " yaml.safe_dump(config, f, sort_keys=False)\n", "\n", - "NAMESPACE = get_namespace()\n", "FEAST_PROJECT = config[\"project\"]\n", "\n", "print(f\"Registry mode: {REGISTRY_MODE}\")\n", From e5bcc852241c5e55fdb394497ad47b3d849de7b3 Mon Sep 17 00:00:00 2001 From: Igor Kvachenok Date: Mon, 15 Jun 2026 17:33:46 +0200 Subject: [PATCH 22/22] =?UTF-8?q?feast:=20fix=20NetworkPolicies=20?= =?UTF-8?q?=E2=80=94=20correct=20label=20and=20explicit=20namespaceSelecto?= =?UTF-8?q?r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs found via live isolation testing: 1. Registry policy used wrong pod label (app: feast-my-store) — the Feast operator actually labels the server pod feast.dev/name: my-store. The policy was selecting zero pods and enforcing nothing. 2. podSelector: {} without a namespaceSelector does not reliably restrict to the same namespace on this cluster (Calico with connect-time BPF load balancing). Added explicit namespaceSelector with kubernetes.io/metadata.name: to both policies. The placeholder is substituted at apply time by the notebook, which reads the namespace from the pod service account token. The apply step pipes the rendered YAML to kubectl via stdin. Tested: cross-namespace connections (prokube-demo-profile -> developer1) are blocked (Connection reset), same-namespace connections work (Redis returns -NOAUTH, registry returns HTTP 400). --- feast/feast_example.ipynb | 14 ++++++++++++- feast/registry/remote/network-policies.yaml | 23 ++++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/feast/feast_example.ipynb b/feast/feast_example.ipynb index b5d0576..dbac853 100644 --- a/feast/feast_example.ipynb +++ b/feast/feast_example.ipynb @@ -243,9 +243,21 @@ "\n", "# ---------------------------------------------------------------------------\n", "# 5. Network policies (remote mode only)\n", + "#\n", + "# The YAML contains a placeholder — we fill it in at apply time\n", + "# so the namespaceSelector targets this specific namespace. This is required\n", + "# for Calico eBPF mode, which does not reliably enforce same-namespace\n", + "# restriction from podSelector: {} alone.\n", "# ---------------------------------------------------------------------------\n", "if REGISTRY_MODE == \"remote\":\n", - " subprocess.check_call([\"kubectl\", \"apply\", \"-f\", \"registry/remote/network-policies.yaml\"])\n", + " with open(\"registry/remote/network-policies.yaml\") as _f:\n", + " _np_yaml = _f.read().replace(\"\", NAMESPACE)\n", + " _proc = subprocess.run(\n", + " [\"kubectl\", \"apply\", \"-f\", \"-\"],\n", + " input=_np_yaml.encode(), capture_output=True,\n", + " )\n", + " if _proc.returncode != 0:\n", + " raise RuntimeError(_proc.stderr.decode())\n", " print(\"Applied network policies\")\n", "\n", "print(\"\\nInfrastructure ready.\")\n" diff --git a/feast/registry/remote/network-policies.yaml b/feast/registry/remote/network-policies.yaml index 93a1daf..6c90076 100644 --- a/feast/registry/remote/network-policies.yaml +++ b/feast/registry/remote/network-policies.yaml @@ -5,11 +5,18 @@ # NetworkPolicies operate at the CNI layer and enforce isolation regardless of # sidecar configuration. # +# Note: the namespaceSelector inside each ingress rule is intentional. +# Calico eBPF mode does not reliably enforce same-namespace restriction when +# only podSelector: {} is used in the from clause — an explicit +# namespaceSelector is required to lock access to a specific namespace. +# +# The placeholder is filled in by the notebook at apply time +# (it reads the namespace from the pod's service account token). +# # --------------------------------------------------------------------------- # Feast registry server: allow ingress on port 6570 only from pods in the -# same namespace. Pods in other namespaces — including other Feast instances -# — are blocked at the CNI layer. +# same namespace. Pods in other namespaces are blocked at the CNI layer. apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: @@ -17,12 +24,15 @@ metadata: spec: podSelector: matchLabels: - app: feast-my-store # label applied by the Feast operator to the server pod + feast.dev/name: my-store # label applied by the Feast operator to the server pod policyTypes: - Ingress ingress: - from: - - podSelector: {} # any pod in the same namespace + - podSelector: {} + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: ports: - port: 6570 protocol: TCP @@ -41,7 +51,10 @@ spec: - Ingress ingress: - from: - - podSelector: {} # any pod in the same namespace + - podSelector: {} + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: ports: - port: 6379 protocol: TCP