Product analytics tutorial

This tutorial builds a product analytics model with three entities — users, events, and conversions — and answers the questions a growth or product team asks daily: how engaged is each user, did they hit activation milestones, which session was their most active, how does acquisition channel drive revenue, and should we nudge them toward conversion. The final section runs an A/B test with MACRO() and HASH01(), parameterizing the nudge threshold without duplicating business logic.

The data model

The dataset models a SaaS product with four tables: a user dimension, an event stream, a conversion fact table, and a pre-aggregated OBT that stores each user's event IDs as an array.

  • prodanalytics.dim_users — one row per user. user_id is the primary key. plan is 'free' or 'pro', and acquisition_channel is how they found the product ('organic', 'paid_ad', or 'referral').
  • prodanalytics.fct_events — one row per user action. event_id is the primary key. event_user_id is a foreign key to users. event_type captures the action ('signup', 'view_dashboard', 'create_project', 'invite_teammate', 'upgrade_prompt', 'subscribe'). session_id groups events that occurred in the same browsing session.
  • prodanalytics.fct_conversions — one row per paid conversion. conversion_id is the primary key. conversion_user_id is a foreign key to users. revenue is the first payment amount.
  • prodanalytics.agg_users_obt — pre-aggregated: each user's last_event_id (most recent by timestamp) and events (array of all event IDs, ordered chronologically).

FeatureQL
/* SQL */
--
CREATE SCHEMA IF NOT EXISTS prodanalytics;
DROP TABLE IF EXISTS prodanalytics.dim_users;
DROP TABLE IF EXISTS prodanalytics.fct_events;
DROP TABLE IF EXISTS prodanalytics.fct_conversions;
DROP TABLE IF EXISTS prodanalytics.agg_users_obt;
--
CREATE TABLE prodanalytics.dim_users (
    user_id BIGINT,
    signup_date DATE,
    plan VARCHAR,
    acquisition_channel VARCHAR
);
INSERT INTO prodanalytics.dim_users VALUES
    (100, DATE '2024-09-01', 'free', 'organic'),
    (101, DATE '2024-09-15', 'free', 'paid_ad'),
    (102, DATE '2024-10-01', 'pro', 'organic'),
    (103, DATE '2024-10-10', 'free', 'referral'),
    (104, DATE '2024-11-01', 'pro', 'paid_ad'),
    (105, DATE '2024-11-15', 'free', 'organic');
--
CREATE TABLE prodanalytics.fct_events (
    event_id BIGINT,
    event_user_id BIGINT,
    event_type VARCHAR,
    event_ts TIMESTAMP,
    page_path VARCHAR,
    session_id BIGINT
);
INSERT INTO prodanalytics.fct_events VALUES
    -- User 100: signup -> view_dashboard -> create_project -> view_dashboard -> invite_teammate -> upgrade_prompt -> subscribe (conversion)
    (1,  100, 'signup',          TIMESTAMP '2024-09-01 10:00:00', '/welcome',     1000),
    (2,  100, 'view_dashboard',  TIMESTAMP '2024-09-01 10:05:00', '/dashboard',   1000),
    (3,  100, 'create_project',  TIMESTAMP '2024-09-01 10:15:00', '/projects/new',1000),
    (4,  100, 'view_dashboard',  TIMESTAMP '2024-09-03 09:00:00', '/dashboard',   1001),
    (5,  100, 'invite_teammate', TIMESTAMP '2024-09-03 09:10:00', '/team/invite', 1001),
    (6,  100, 'upgrade_prompt',  TIMESTAMP '2024-09-05 14:00:00', '/pricing',     1002),
    (7,  100, 'subscribe',       TIMESTAMP '2024-09-05 14:05:00', '/checkout',    1002),
    -- User 101: signup -> view_dashboard -> view_dashboard -> churns (no conversion)
    (8,  101, 'signup',          TIMESTAMP '2024-09-15 11:00:00', '/welcome',     1003),
    (9,  101, 'view_dashboard',  TIMESTAMP '2024-09-15 11:10:00', '/dashboard',   1003),
    (10, 101, 'view_dashboard',  TIMESTAMP '2024-09-20 08:00:00', '/dashboard',   1004),
    -- User 102: signup -> create_project -> create_project -> invite_teammate -> subscribe
    (11, 102, 'signup',          TIMESTAMP '2024-10-01 09:00:00', '/welcome',     1005),
    (12, 102, 'create_project',  TIMESTAMP '2024-10-01 09:30:00', '/projects/new',1005),
    (13, 102, 'create_project',  TIMESTAMP '2024-10-02 10:00:00', '/projects/new',1006),
    (14, 102, 'invite_teammate', TIMESTAMP '2024-10-02 10:15:00', '/team/invite', 1006),
    (15, 102, 'subscribe',       TIMESTAMP '2024-10-03 11:00:00', '/checkout',    1007),
    -- User 103: signup -> view_dashboard -> create_project -> upgrade_prompt (no conversion yet)
    (16, 103, 'signup',          TIMESTAMP '2024-10-10 13:00:00', '/welcome',     1008),
    (17, 103, 'view_dashboard',  TIMESTAMP '2024-10-10 13:10:00', '/dashboard',   1008),
    (18, 103, 'create_project',  TIMESTAMP '2024-10-12 15:00:00', '/projects/new',1009),
    (19, 103, 'upgrade_prompt',  TIMESTAMP '2024-10-15 10:00:00', '/pricing',     1010),
    -- User 104: signup -> create_project -> invite_teammate -> invite_teammate -> subscribe (fast converter)
    (20, 104, 'signup',          TIMESTAMP '2024-11-01 08:00:00', '/welcome',     1011),
    (21, 104, 'create_project',  TIMESTAMP '2024-11-01 08:20:00', '/projects/new',1011),
    (22, 104, 'invite_teammate', TIMESTAMP '2024-11-01 08:30:00', '/team/invite', 1011),
    (23, 104, 'invite_teammate', TIMESTAMP '2024-11-01 08:35:00', '/team/invite', 1011),
    (24, 104, 'subscribe',       TIMESTAMP '2024-11-01 09:00:00', '/checkout',    1011),
    -- User 105: signup -> view_dashboard (minimal engagement, no conversion)
    (25, 105, 'signup',          TIMESTAMP '2024-11-15 16:00:00', '/welcome',     1012),
    (26, 105, 'view_dashboard',  TIMESTAMP '2024-11-15 16:05:00', '/dashboard',   1012);
--
CREATE TABLE prodanalytics.fct_conversions (
    conversion_id BIGINT,
    conversion_user_id BIGINT,
    conversion_type VARCHAR,
    conversion_ts TIMESTAMP,
    revenue DECIMAL(10,2)
);
INSERT INTO prodanalytics.fct_conversions VALUES
    (400, 100, 'trial_to_paid', TIMESTAMP '2024-09-05 14:05:00', 29.00),
    (401, 102, 'direct_paid',   TIMESTAMP '2024-10-03 11:00:00', 49.00),
    (402, 104, 'direct_paid',   TIMESTAMP '2024-11-01 09:00:00', 49.00);
--
CREATE TABLE prodanalytics.agg_users_obt (
    user_id BIGINT,
    last_event_id BIGINT,
    events BIGINT[]
);
INSERT INTO prodanalytics.agg_users_obt
SELECT
    event_user_id AS user_id,
    MAX_BY(event_id, event_ts) AS last_event_id,
    ARRAY_AGG(event_id ORDER BY event_ts)
FROM prodanalytics.fct_events
GROUP BY event_user_id;
--
SELECT CAST(COUNT(*) AS INTEGER) AS Count FROM prodanalytics.agg_users_obt;
Result
Count BIGINT
6

Define entities and map external data

The model has three entities: USERS (IDs in the 100s), EVENTS (single digits through 26), and CONVERSIONS (400s). Entity annotations on foreign key columns (event_user_id BIGINT#USERS, conversion_user_id BIGINT#USERS) tell FeatureQL how to traverse relationships. The OBT's events ARRAY(BIGINT#EVENTS) declares an array of event entity references, enabling the EXTEND(ZIP(...)) pattern used in later queries.

FeatureQL
CREATE OR REPLACE FEATURES IN FM.PRODANALYTICS AS
SELECT
    USERS := ENTITY(),
    EVENTS := ENTITY(),
    CONVERSIONS := ENTITY(),
    USER_ID := INPUT(BIGINT#USERS),
    EVENT_ID := INPUT(BIGINT#EVENTS),
    CONVERSION_ID := INPUT(BIGINT#CONVERSIONS),
;
Result
feature_name VARCHARstatus VARCHARmessage VARCHAR
FM.PRODANALYTICS.USERSCREATEDFeature created as not exists
FM.PRODANALYTICS.EVENTSCREATEDFeature created as not exists
FM.PRODANALYTICS.CONVERSIONSCREATEDFeature created as not exists
FM.PRODANALYTICS.USER_IDCREATEDFeature created as not exists
FM.PRODANALYTICS.EVENT_IDCREATEDFeature created as not exists
FM.PRODANALYTICS.CONVERSION_IDCREATEDFeature created as not exists

FeatureQL
CREATE OR REPLACE FEATURES IN FM.PRODANALYTICS AS
SELECT
    TABLES.DIM_USERS := EXTERNAL_COLUMNS(
        user_id BIGINT#USERS BIND TO USER_ID,
        signup_date DATE,
        plan VARCHAR,
        acquisition_channel VARCHAR,
        FROM TABLE(prodanalytics.dim_users)
    ),
    TABLES.FCT_EVENTS := EXTERNAL_COLUMNS(
        event_id BIGINT#EVENTS BIND TO EVENT_ID,
        event_user_id BIGINT#USERS,
        event_type VARCHAR,
        event_ts TIMESTAMP,
        page_path VARCHAR,
        session_id BIGINT,
        FROM TABLE(prodanalytics.fct_events)
    ),
    TABLES.FCT_CONVERSIONS := EXTERNAL_COLUMNS(
        conversion_id BIGINT#CONVERSIONS BIND TO CONVERSION_ID,
        conversion_user_id BIGINT#USERS,
        conversion_type VARCHAR,
        conversion_ts TIMESTAMP,
        revenue DECIMAL(10,2),
        FROM TABLE(prodanalytics.fct_conversions)
    ),
    TABLES.AGG_USERS_OBT := EXTERNAL_COLUMNS(
        user_id BIGINT#USERS BIND TO USER_ID,
        last_event_id BIGINT#EVENTS,
        events ARRAY(BIGINT#EVENTS),
        FROM TABLE(prodanalytics.agg_users_obt)
    ),
;
Result
feature_name VARCHARstatus VARCHARmessage VARCHAR
FM.PRODANALYTICS.TABLES.DIM_USERSCREATEDFeature created as not exists
FM.PRODANALYTICS.TABLES.FCT_EVENTSCREATEDFeature created as not exists
FM.PRODANALYTICS.TABLES.FCT_CONVERSIONSCREATEDFeature created as not exists
FM.PRODANALYTICS.TABLES.AGG_USERS_OBTCREATEDFeature created as not exists

How engaged is each user?

The first question combines two aggregation patterns. NUM_EVENTS uses PK-to-FK aggregation to count events per user. CONVERSION_DATE does the same against the conversions table, taking the earliest conversion timestamp. DAYS_TO_CONVERT then computes the gap between signup and first conversion — a derived feature that mixes data from two different entity relationships in a single expression.

Users 100, 102, and 104 converted (in 4, 2, and 0 days respectively). The other three never converted, so DAYS_TO_CONVERT is NULL.

In SQL, this would require two separate CTEs (one for event count, one for first conversion date), then a three-way LEFT JOIN back to the user dimension. FeatureQL expresses each metric as an independent feature and lets the engine handle the joins.

FeatureQL
WITH
    PLAN := TABLES.DIM_USERS[plan],
    SIGNUP_DATE := TABLES.DIM_USERS[signup_date],
    NUM_EVENTS := USER_ID.RELATED(COUNT(1) GROUP BY TABLES.FCT_EVENTS[event_user_id]),
    CONVERSION_DATE := USER_ID.RELATED(
        MIN(TABLES.FCT_CONVERSIONS[conversion_ts]) GROUP BY TABLES.FCT_CONVERSIONS[conversion_user_id]
    ),
    HAS_CONVERTED := CONVERSION_DATE IS NOT NULL::TIMESTAMP,
    DAYS_TO_CONVERT := CASE
        WHEN HAS_CONVERTED THEN DATE_DIFF(CONVERSION_DATE, SIGNUP_DATE::TIMESTAMP, 'day')
        ELSE NULL(BIGINT)
    END,
SELECT
    USER_ID,
    PLAN,
    NUM_EVENTS,
    HAS_CONVERTED,
    DAYS_TO_CONVERT,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
    CONVERSION_ID := BIND_VALUES(SEQUENCE(400, 402)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTPLAN VARCHARNUM_EVENTS BIGINTHAS_CONVERTED BOOLEANDAYS_TO_CONVERT VARCHAR
100free7TRUE4
101free3FALSENULL
102pro5TRUE2
103free4FALSENULL
104pro5TRUE0
105free2FALSENULL

What was each user's last action?

The OBT stores each user's last_event_id as a foreign key to the events entity. RELATED() follows that key to retrieve the event type and timestamp — a FK-to-PK lookup, the simplest relationship pattern.

This is useful for identifying stale users (last action was weeks ago) or users stuck at a particular step (last action was upgrade_prompt but never subscribe).

FeatureQL
WITH
    USER_PLAN := TABLES.DIM_USERS[plan],
    LAST_EVENT_ID := TABLES.AGG_USERS_OBT[last_event_id],
    LAST_EVENT_TYPE := LAST_EVENT_ID.RELATED(TABLES.FCT_EVENTS[event_type]),
    LAST_EVENT_TS := LAST_EVENT_ID.RELATED(TABLES.FCT_EVENTS[event_ts]),
SELECT
    USER_ID,
    USER_PLAN,
    LAST_EVENT_ID,
    LAST_EVENT_TYPE,
    LAST_EVENT_TS,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTUSER_PLAN VARCHARLAST_EVENT_ID BIGINTLAST_EVENT_TYPE VARCHARLAST_EVENT_TS VARCHAR
100free7subscribe2024-09-05T14:05:00
101free10view_dashboard2024-09-20T08:00:00
102pro15subscribe2024-10-03T11:00:00
103free19upgrade_prompt2024-10-15T10:00:00
104pro24subscribe2024-11-01T09:00:00
105free26view_dashboard2024-11-15T16:05:00

Did each user hit activation milestones?

This is where array manipulation shows its strength. The activation funnel has three milestones: creating a project, inviting a teammate, and subscribing. Instead of writing three separate EXISTS subqueries with correlated predicates, FeatureQL enriches the user's event array once and then queries it three times.

EXTEND(ZIP(USER_EVENTS AS event_id) WITH EVENT_TYPE AS event_type ...) converts the scalar array of event IDs into an array of rows, then looks up each event's type. The result is ARRAY<ROW<event_id, event_type>> — a self-contained event log per user.

Each .TRANSFORM(SELECT COUNT(*) WHERE event_type = '...') runs a filtered aggregation inside that array. .UNWRAP_ONE() extracts the scalar count, and > 0 converts it to a boolean. Three milestone checks, one enrichment step, zero joins.

In SQL, each milestone would be a separate EXISTS (SELECT 1 FROM fct_events WHERE event_user_id = u.user_id AND event_type = '...') correlated subquery — or a single GROUP BY with conditional aggregation followed by a join. FeatureQL avoids both patterns.

FeatureQL
WITH
    CHANNEL := TABLES.DIM_USERS[acquisition_channel],
    USER_EVENTS := TABLES.AGG_USERS_OBT[events],
    EVENT_TYPE := TABLES.FCT_EVENTS[event_type],
    ENRICHED_EVENTS := EXTEND(
        ZIP(USER_EVENTS AS event_id)
        WITH EVENT_TYPE AS event_type
        VIA event_id BIND TO EVENT_ID
    ),
    DID_CREATE_PROJECT := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'create_project')
            .UNWRAP_ONE(),
        0
    ) > 0,
    DID_INVITE := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'invite_teammate')
            .UNWRAP_ONE(),
        0
    ) > 0,
    DID_SUBSCRIBE := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'subscribe')
            .UNWRAP_ONE(),
        0
    ) > 0,
SELECT
    USER_ID,
    CHANNEL,
    DID_CREATE_PROJECT,
    DID_INVITE,
    DID_SUBSCRIBE,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTCHANNEL VARCHARDID_CREATE_PROJECT BOOLEANDID_INVITE BOOLEANDID_SUBSCRIBE VARCHAR
100organicTRUETRUETRUE
101paid_adFALSEFALSEFALSE
102organicTRUETRUETRUE
103referralTRUEFALSEFALSE
104paid_adTRUETRUETRUE
105organicFALSEFALSEFALSE

Which session was each user's most active?

This question goes deeper into array manipulation: enrich events with session IDs, group within the array by session, count events per session, then pick the top one. Two levels of .TRANSFORM() chained together.

The first .TRANSFORM(SELECT session_id, COUNT(*) GROUP BY session_id AS event_count ORDER BY event_count DESC) runs a group-by aggregation inside the enriched array — producing a summary array of ROW<session_id, event_count> per user. The second .TRANSFORM(SELECT ... ORDER BY ... LIMIT 1) picks the session with the most events.

User 104 stands out: all 5 of their events happened in a single session (1011), meaning they went from signup to paid subscription in one sitting. User 100 spread their 7 events across 3 sessions, with the signup session (1000) being the most active at 3 events.

The SQL equivalent would require UNNEST + JOIN + GROUP BY session_id + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY count DESC) + re-aggregation. FeatureQL keeps the nested structure intact throughout.

FeatureQL
WITH
    USER_EVENTS := TABLES.AGG_USERS_OBT[events],
    EVENT_SESSION := TABLES.FCT_EVENTS[session_id],
    ENRICHED_EVENTS := EXTEND(
        ZIP(USER_EVENTS AS event_id)
        WITH EVENT_SESSION AS session_id
        VIA event_id BIND TO EVENT_ID
    ),
    SESSIONS_SUMMARY := ENRICHED_EVENTS.TRANSFORM(
        SELECT session_id, COUNT(*) GROUP BY session_id AS event_count
        ORDER BY event_count DESC, session_id
    ),
    NUM_SESSIONS := ARRAY_LENGTH(SESSIONS_SUMMARY),
    TOP_SESSION := SESSIONS_SUMMARY.TRANSFORM(
        SELECT session_id, event_count ORDER BY event_count DESC, session_id LIMIT 1
    ),
SELECT
    USER_ID,
    NUM_SESSIONS,
    TOP_SESSION,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTNUM_SESSIONS BIGINTTOP_SESSION VARCHAR
1003[{session_id: 1000, event_count: 3}]
1012[{session_id: 1003, event_count: 2}]
1023[{session_id: 1005, event_count: 2}]
1033[{session_id: 1008, event_count: 2}]
1041[{session_id: 1011, event_count: 5}]
1051[{session_id: 1012, event_count: 2}]

Which acquisition channels drive the most revenue?

This shifts to a cross-entity aggregation: sum conversion revenue per user, then roll it up by acquisition channel. USER_REVENUE is a per-user feature (FK aggregation from conversions). CHANNEL_REVENUE and CONVERTER_COUNT are per-channel features (aggregation over users grouped by channel).

The ability to stack aggregations at different granularities in a single query — per-user revenue feeding into per-channel totals — is a case where FeatureQL's inline GROUP BY expressions eliminate the nested CTEs that SQL would require.

Organic users generated $78 across 2 converters (users 100 and 102). Paid ad produced $49 from 1 converter (user 104). The referral channel (user 103) hasn't converted yet.

FeatureQL
WITH
    CHANNEL := TABLES.DIM_USERS[acquisition_channel],
    USER_REVENUE := USER_ID.RELATED(
        SUM(TABLES.FCT_CONVERSIONS[revenue]) GROUP BY TABLES.FCT_CONVERSIONS[conversion_user_id]
    ),
    CHANNEL_REVENUE := SUM(USER_REVENUE) GROUP BY CHANNEL,
    CONVERTER_COUNT := COUNT_IF(USER_REVENUE IS NOT NULL::DECIMAL(10,2)) GROUP BY CHANNEL,
SELECT
    CHANNEL,
    CHANNEL_REVENUE,
    CONVERTER_COUNT,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    CONVERSION_ID := BIND_VALUES(SEQUENCE(400, 402)),
WHERE CHANNEL_REVENUE IS NOT NULL::DECIMAL(10,2)
ORDER BY CHANNEL_REVENUE DESC
;
Result
CHANNEL VARCHARCHANNEL_REVENUE VARCHARCONVERTER_COUNT VARCHAR
organic78.002
paid_ad49.001

Classify users by engagement tier

This combines everything into a single classification. HAS_CONVERTED comes from the conversions entity, DID_CREATE_PROJECT comes from enriching and scanning the event array, and the CASE WHEN produces a label:

  • power_user: converted and created a project (users 100, 102, 104)
  • activated: created a project but hasn't converted yet (user 103)
  • dormant: hasn't even created a project (users 101, 105)

FeatureQL
WITH
    USER_NAME := TABLES.DIM_USERS[plan],
    NUM_EVENTS := USER_ID.RELATED(COUNT(1) GROUP BY TABLES.FCT_EVENTS[event_user_id]),
    CONVERSION_DATE := USER_ID.RELATED(
        MIN(TABLES.FCT_CONVERSIONS[conversion_ts]) GROUP BY TABLES.FCT_CONVERSIONS[conversion_user_id]
    ),
    HAS_CONVERTED := CONVERSION_DATE IS NOT NULL::TIMESTAMP,
    USER_EVENTS := TABLES.AGG_USERS_OBT[events],
    EVENT_TYPE := TABLES.FCT_EVENTS[event_type],
    ENRICHED_EVENTS := EXTEND(
        ZIP(USER_EVENTS AS event_id)
        WITH EVENT_TYPE AS event_type
        VIA event_id BIND TO EVENT_ID
    ),
    DID_CREATE_PROJECT := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'create_project')
            .UNWRAP_ONE(),
        0
    ) > 0,
    ENGAGEMENT_TIER := CASE
        WHEN HAS_CONVERTED AND DID_CREATE_PROJECT THEN 'power_user'
        WHEN DID_CREATE_PROJECT THEN 'activated'
        ELSE 'dormant'
    END,
SELECT
    USER_ID,
    USER_NAME,
    ENGAGEMENT_TIER,
    NUM_EVENTS,
    HAS_CONVERTED,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
    CONVERSION_ID := BIND_VALUES(SEQUENCE(400, 402)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTUSER_NAME VARCHARENGAGEMENT_TIER VARCHARNUM_EVENTS BIGINTHAS_CONVERTED VARCHAR
100freepower_user7TRUE
101freedormant3FALSE
102propower_user5TRUE
103freeactivated4FALSE
104propower_user5TRUE
105freedormant2FALSE

A/B testing the nudge threshold

The product team wants to experiment with when to show an upgrade nudge to activated-but-unconverted users. The control nudges after 3+ events; the test variant raises the bar to 5+. Rather than duplicating the nudge logic for each arm, MACRO() defines it once with a parameterized threshold.

First, persist the engagement features so both experiment arms can reference them:

FeatureQL
CREATE OR REPLACE FEATURES IN FM.PRODANALYTICS AS
SELECT
    NUM_EVENTS := USER_ID.RELATED(COUNT(1) GROUP BY TABLES.FCT_EVENTS[event_user_id]),
    CONVERSION_DATE := USER_ID.RELATED(
        MIN(TABLES.FCT_CONVERSIONS[conversion_ts]) GROUP BY TABLES.FCT_CONVERSIONS[conversion_user_id]
    ),
    HAS_CONVERTED := CONVERSION_DATE IS NOT NULL::TIMESTAMP,
;
Result
feature_name VARCHARstatus VARCHARmessage VARCHAR
FM.PRODANALYTICS.NUM_EVENTSCREATEDFeature created as not exists
FM.PRODANALYTICS.CONVERSION_DATECREATEDFeature created as not exists
FM.PRODANALYTICS.HAS_CONVERTEDCREATEDFeature created as not exists

The macro show_nudge_macro accepts a threshold and produces a boolean: show the nudge if the user created a project, has not converted, and has accumulated at least threshold events. It's instantiated twice — show_nudge_macro(3) for control and show_nudge_macro(5) for test.

HASH01() provides deterministic bucketing. The same user always lands in the same arm, with no external randomization service needed. The salt string 'nudge_exp_v1' can be changed to re-randomize a new experiment.

In this dataset, only user 103 qualifies for the nudge (created a project, not converted, 4 events). Under control rules (threshold 3), they'd see the nudge. But if HASH01 places them in the test group (threshold 5), they don't — 4 events is below the test threshold.

FeatureQL
WITH
    threshold := INPUT(BIGINT),
    USER_EVENTS := TABLES.AGG_USERS_OBT[events],
    EVENT_TYPE := TABLES.FCT_EVENTS[event_type],
    ENRICHED_EVENTS := EXTEND(
        ZIP(USER_EVENTS AS event_id)
        WITH EVENT_TYPE AS event_type
        VIA event_id BIND TO EVENT_ID
    ),
    DID_CREATE_PROJECT := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'create_project')
            .UNWRAP_ONE(),
        0
    ) > 0,
    show_nudge_macro := MACRO(
        DID_CREATE_PROJECT AND NOT HAS_CONVERTED AND NUM_EVENTS >= threshold
        USING threshold
    ),
    SHOW_NUDGE_CONTROL := show_nudge_macro(3),
    SHOW_NUDGE_TEST := show_nudge_macro(5),
    IN_TEST_GROUP :=
        HASH01(UNSAFE_CAST(USER_ID AS VARCHAR) || 'nudge_exp_v1')
        BETWEEN.5 0e0 AND 0.5e0,
SELECT
    USER_ID,
    SHOW_NUDGE_CONTROL,
    IN_TEST_GROUP,
    SHOW_NUDGE := CASE
        WHEN IN_TEST_GROUP THEN SHOW_NUDGE_TEST
        ELSE SHOW_NUDGE_CONTROL
    END,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
    CONVERSION_ID := BIND_VALUES(SEQUENCE(400, 402)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTSHOW_NUDGE_CONTROL BOOLEANIN_TEST_GROUP BOOLEANSHOW_NUDGE VARCHAR
100FALSEFALSEFALSE
101FALSETRUEFALSE
102FALSETRUEFALSE
103TRUETRUEFALSE
104FALSEFALSEFALSE
105FALSETRUEFALSE

The SQL equivalent would require duplicating the entire nudge condition for each arm, manually implementing hash-based bucketing with MD5 and bit manipulation, and joining three CTEs. The FeatureQL version defines the logic once and parameterizes what differs.

Putting it all together

This single query produces a complete user profile: plan and channel from the dimension table, event count and conversion status from RELATED(), activation milestones from EXTEND() + TRANSFORM(), session analysis from nested TRANSFORM() with GROUP BY, and engagement tier from CASE WHEN.

FeatureQL
WITH
    PLAN := TABLES.DIM_USERS[plan],
    CHANNEL := TABLES.DIM_USERS[acquisition_channel],
    SIGNUP_DATE := TABLES.DIM_USERS[signup_date],
    NUM_EVENTS := USER_ID.RELATED(COUNT(1) GROUP BY TABLES.FCT_EVENTS[event_user_id]),
    CONVERSION_DATE := USER_ID.RELATED(
        MIN(TABLES.FCT_CONVERSIONS[conversion_ts]) GROUP BY TABLES.FCT_CONVERSIONS[conversion_user_id]
    ),
    HAS_CONVERTED := CONVERSION_DATE IS NOT NULL::TIMESTAMP,
    DAYS_TO_CONVERT := CASE
        WHEN HAS_CONVERTED THEN DATE_DIFF(CONVERSION_DATE, SIGNUP_DATE::TIMESTAMP, 'day')
        ELSE NULL(BIGINT)
    END,
    USER_EVENTS := TABLES.AGG_USERS_OBT[events],
    EVENT_TYPE := TABLES.FCT_EVENTS[event_type],
    EVENT_SESSION := TABLES.FCT_EVENTS[session_id],
    ENRICHED_EVENTS := EXTEND(
        ZIP(USER_EVENTS AS event_id)
        WITH EVENT_TYPE AS event_type, EVENT_SESSION AS session_id
        VIA event_id BIND TO EVENT_ID
    ),
    DID_CREATE_PROJECT := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'create_project')
            .UNWRAP_ONE(),
        0
    ) > 0,
    DID_INVITE := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'invite_teammate')
            .UNWRAP_ONE(),
        0
    ) > 0,
    DID_SUBSCRIBE := COALESCE(
        ENRICHED_EVENTS
            .TRANSFORM(SELECT COUNT(*) WHERE event_type = 'subscribe')
            .UNWRAP_ONE(),
        0
    ) > 0,
    SESSIONS_SUMMARY := ENRICHED_EVENTS.TRANSFORM(
        SELECT session_id, COUNT(*) GROUP BY session_id AS event_count
        ORDER BY event_count DESC, session_id
    ),
    TOP_SESSION := SESSIONS_SUMMARY.TRANSFORM(
        SELECT session_id, event_count ORDER BY event_count DESC, session_id LIMIT 1
    ),
    ENGAGEMENT_TIER := CASE
        WHEN HAS_CONVERTED AND DID_CREATE_PROJECT THEN 'power_user'
        WHEN DID_CREATE_PROJECT THEN 'activated'
        ELSE 'dormant'
    END,
SELECT
    USER_ID,
    PLAN,
    CHANNEL,
    NUM_EVENTS,
    HAS_CONVERTED,
    DAYS_TO_CONVERT,
    DID_CREATE_PROJECT,
    DID_INVITE,
    DID_SUBSCRIBE,
    TOP_SESSION,
    ENGAGEMENT_TIER,
FROM FM.PRODANALYTICS
FOR CROSS
    USER_ID := BIND_VALUES(SEQUENCE(100, 105)),
    EVENT_ID := BIND_VALUES(SEQUENCE(1, 26)),
    CONVERSION_ID := BIND_VALUES(SEQUENCE(400, 402)),
;
Result
FM.PRODANALYTICS.USER_ID BIGINTPLAN VARCHARCHANNEL VARCHARNUM_EVENTS BIGINTHAS_CONVERTED BOOLEANDAYS_TO_CONVERT BIGINTDID_CREATE_PROJECT BOOLEANDID_INVITE BOOLEANDID_SUBSCRIBE BOOLEANTOP_SESSION VARCHARENGAGEMENT_TIER VARCHAR
100freeorganic7TRUE4TRUETRUETRUE[{session_id: 1000, event_count: 3}]power_user
101freepaid_ad3FALSENULLFALSEFALSEFALSE[{session_id: 1003, event_count: 2}]dormant
102proorganic5TRUE2TRUETRUETRUE[{session_id: 1005, event_count: 2}]power_user
103freereferral4FALSENULLTRUEFALSEFALSE[{session_id: 1008, event_count: 2}]activated
104propaid_ad5TRUE0TRUETRUETRUE[{session_id: 1011, event_count: 5}]power_user
105freeorganic2FALSENULLFALSEFALSEFALSE[{session_id: 1012, event_count: 2}]dormant

Every feature in this query was built and tested independently in the previous sections. The combined query just references them together — FeatureQL resolves the dependency graph and generates the necessary joins and aggregations automatically.

What's next

Last update at: 2026/04/27 15:40:31