AI-система предсказания расширения аккаунтов
Account expansion — рост выручки от существующих клиентов через upsell и cross-sell. В B2B SaaS это Net Revenue Retention (NRR): хорошие компании имеют NRR >120%, то есть без единой новой сделки выручка растёт на 20%+ в год. AI определяет, какой аккаунт готов к расширению и почему.
Модель expansion вероятности
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
import shap
from anthropic import Anthropic
import json
class AccountExpansionPredictor:
"""Предсказание готовности аккаунта к расширению"""
def __init__(self):
self.model = GradientBoostingClassifier(
n_estimators=200, learning_rate=0.05, max_depth=4, random_state=42
)
self.llm = Anthropic()
def build_account_features(self, accounts: pd.DataFrame,
usage_data: pd.DataFrame,
support_data: pd.DataFrame) -> pd.DataFrame:
"""Feature engineering для expansion предсказания"""
features = accounts[['account_id']].copy()
# === Product Usage Signals ===
usage = usage_data.groupby('account_id').agg(
monthly_active_users=('user_id', pd.Series.nunique),
feature_breadth=('feature_name', pd.Series.nunique),
sessions_per_user=('session_id', 'count'),
advanced_features_used=('is_advanced_feature', 'sum'),
)
features = features.merge(usage, on='account_id', how='left')
# Тренд использования за последние 3 месяца
recent_usage = usage_data[
usage_data['date'] >= pd.Timestamp.now() - pd.DateOffset(months=3)
]
older_usage = usage_data[
(usage_data['date'] < pd.Timestamp.now() - pd.DateOffset(months=3)) &
(usage_data['date'] >= pd.Timestamp.now() - pd.DateOffset(months=6))
]
recent_counts = recent_usage.groupby('account_id')['session_id'].count()
older_counts = older_usage.groupby('account_id')['session_id'].count()
usage_trend = (recent_counts - older_counts) / (older_counts + 1)
features['usage_trend_3m'] = features['account_id'].map(usage_trend).fillna(0)
# === Account Health ===
features['days_as_customer'] = accounts.get('days_since_first_purchase', pd.Series([180]))
features['current_plan_tier'] = accounts.get('plan_tier', pd.Series([1])) # 1=basic, 2=pro, 3=enterprise
features['seats_utilization'] = (
accounts.get('active_users', 1) / accounts.get('licensed_seats', 1)
).clip(0, 1)
features['contract_months_remaining'] = accounts.get('contract_months_remaining', 12)
# === Support & Satisfaction ===
support = support_data.groupby('account_id').agg(
support_tickets_3m=('ticket_id', 'count'),
avg_csat=('csat_score', 'mean'),
has_critical_tickets=('priority', lambda x: (x == 'critical').any().astype(int))
)
features = features.merge(support, on='account_id', how='left')
features['support_tickets_3m'] = features['support_tickets_3m'].fillna(0)
features['avg_csat'] = features['avg_csat'].fillna(3.5)
# === Expansion Readiness Signals ===
features['seats_at_capacity'] = (features['seats_utilization'] > 0.90).astype(int)
features['power_user_count'] = usage_data[
usage_data['sessions_count'] > usage_data['sessions_count'].quantile(0.90)
].groupby('account_id')['user_id'].nunique().reindex(features['account_id']).fillna(0).values
return features.fillna(0)
def predict_expansion_opportunities(self, accounts: pd.DataFrame,
usage_data: pd.DataFrame,
support_data: pd.DataFrame) -> pd.DataFrame:
"""Список аккаунтов с высокой вероятностью расширения"""
features = self.build_account_features(accounts, usage_data, support_data)
feature_cols = [c for c in features.columns if c != 'account_id']
X = features[feature_cols]
probs = self.model.predict_proba(X)[:, 1]
features['expansion_probability'] = probs
features['expansion_potential_usd'] = self._estimate_expansion_value(features, accounts)
features['recommended_product'] = self._recommend_expansion_product(features)
# Приоритизация для sales team
features['priority_score'] = features['expansion_probability'] * np.log1p(features['expansion_potential_usd'])
return features.sort_values('priority_score', ascending=False)
def _estimate_expansion_value(self, features: pd.DataFrame,
accounts: pd.DataFrame) -> pd.Series:
"""Потенциальный ARR от расширения"""
base_arr = accounts.get('current_arr', pd.Series([10000]))
# Seats expansion
seats_expansion = (
features.get('seats_at_capacity', 0) *
features.get('power_user_count', 0) * 50 # $50/seat/month
)
# Plan upgrade
plan_upgrade_potential = (
(features.get('advanced_features_used', 0) > 5) &
(features.get('current_plan_tier', 1) < 2)
).astype(float) * base_arr * 0.5
return (seats_expansion * 12 + plan_upgrade_potential).fillna(0)
def _recommend_expansion_product(self, features: pd.DataFrame) -> pd.Series:
"""Рекомендуемый продукт для расширения"""
conditions = [
features.get('seats_at_capacity', pd.Series([0])) > 0,
features.get('feature_breadth', pd.Series([0])) < 5,
features.get('current_plan_tier', pd.Series([1])) == 1,
]
choices = ['seat_expansion', 'feature_add_on', 'plan_upgrade']
result = pd.Series(['general_expansion'] * len(features), index=features.index)
for cond, choice in zip(conditions, choices):
result = result.where(~cond, choice)
return result
def generate_expansion_brief(self, account: dict) -> str:
"""Бриф для account manager о сигналах расширения"""
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=200,
messages=[{
"role": "user",
"content": f"""Write a sales brief for account expansion in Russian.
Account: {account.get('company_name')}
Current ARR: ${account.get('current_arr', 0):,.0f}
Expansion probability: {account.get('expansion_probability', 0):.0%}
Key signals:
- Seats utilization: {account.get('seats_utilization', 0):.0%}
- Usage trend: {account.get('usage_trend_3m', 0):+.0%}
- Advanced features used: {account.get('advanced_features_used', 0)}
- Power users: {account.get('power_user_count', 0)}
Recommended expansion: {account.get('recommended_product', '')}
Estimated value: ${account.get('expansion_potential_usd', 0):,.0f} ARR
Write 2-3 sentences: what signals you see, what to propose, and how to frame the conversation."""
}]
)
return response.content[0].text
Предиктивный подход к account expansion типично увеличивает NRR на 5-15 процентных пунктов: команда CS фокусируется на top-20% аккаунтов с реальным потенциалом вместо равномерного распределения усилий. Минимальный датасет для обучения: 200+ expansion событий за 12+ месяцев.







