1- """Translate the connector's ``AuthProvider`` into ``databricks_sql_kernel``
2- ``Session`` auth kwargs.
3-
4- This phase ships PAT only. The kernel-side PyO3 binding accepts
5- ``auth_type='pat'``; OAuth / federation / custom credentials
6- providers are reserved but not yet wired in either layer. Non-PAT
7- auth raises ``NotSupportedError`` from this bridge so the failure
8- surfaces at session-open time with a clear message rather than
9- deep inside the kernel.
10-
11- Token extraction goes through ``AuthProvider.add_headers({})``
12- rather than touching auth-provider-specific attributes, so the
13- bridge works uniformly for every PAT shape — including
14- ``AccessTokenAuthProvider`` wrapped in ``TokenFederationProvider``
15- (which ``get_python_sql_connector_auth_provider`` does for every
16- provider it builds).
1+ """Translate the connector's auth configuration into
2+ ``databricks_sql_kernel`` ``Session`` auth kwargs.
3+
4+ Three auth shapes are supported on the kernel path:
5+
6+ - **PAT** — extracted from the built ``AuthProvider`` (works for
7+ ``AccessTokenAuthProvider``, including the ``TokenFederationProvider``
8+ wrapper that ``get_python_sql_connector_auth_provider`` always
9+ applies). Maps to the kernel's ``auth_type='pat'``.
10+ - **OAuth M2M** — when the caller passes ``oauth_client_id`` +
11+ ``oauth_client_secret``, the *raw* credentials are forwarded to the
12+ kernel's ``auth_type='oauth-m2m'`` and the kernel owns the full
13+ token lifecycle (acquire + refresh via workspace OIDC
14+ client-credentials). We forward the raw pair rather than reusing the
15+ connector's own OAuth provider because the kernel re-mints tokens
16+ itself and the client secret is not recoverable from a built
17+ provider.
18+ - **OAuth U2M** — for ``auth_type`` ``databricks-oauth`` /
19+ ``azure-oauth`` (the browser authorization-code flow), the optional
20+ ``oauth_client_id`` / ``oauth_redirect_port`` are forwarded to the
21+ kernel's ``auth_type='oauth-u2m'`` and the kernel runs the browser
22+ flow itself.
23+
24+ A user-supplied custom ``credentials_provider`` is **rejected** on the
25+ kernel path with ``NotSupportedError``: it's an opaque token source
26+ with no extractable raw credentials, so the kernel can't own the
27+ lifecycle. Such callers should pass ``oauth_client_id`` /
28+ ``oauth_client_secret`` (M2M) instead. Anything else non-PAT also
29+ raises ``NotSupportedError`` so the failure surfaces at session-open
30+ with a clear message rather than deep inside the kernel.
31+
32+ The M2M / U2M decisions are driven by the *raw* connect() kwargs
33+ (``auth_options``), not a built ``AuthProvider``. On the kernel path
34+ the connector deliberately does **not** build its own OAuth provider
35+ (that would eagerly run the U2M browser flow / M2M token exchange at
36+ connect() time, before the kernel is consulted), so ``auth_provider``
37+ is either a minimal PAT provider or ``None`` and the OAuth credentials
38+ are available only from the raw kwargs.
1739"""
1840
1941from __future__ import annotations
4668_TOKEN_REJECT_RE = re .compile (r"[\x00-\x20\x7f]" )
4769
4870
49- def _is_pat (auth_provider : AuthProvider ) -> bool :
71+ def _is_pat (auth_provider : Optional [ AuthProvider ] ) -> bool :
5072 """Return True iff this provider ultimately wraps an
5173 ``AccessTokenAuthProvider``.
5274
@@ -65,18 +87,20 @@ def _is_pat(auth_provider: AuthProvider) -> bool:
6587 return False
6688
6789
68- def _extract_bearer_token (auth_provider : AuthProvider ) -> Optional [str ]:
90+ def _extract_bearer_token (auth_provider : Optional [ AuthProvider ] ) -> Optional [str ]:
6991 """Pull the current bearer token out of an ``AuthProvider``.
7092
7193 The connector's ``AuthProvider.add_headers`` mutates a header
7294 dict and writes the ``Authorization: Bearer <token>`` value.
7395 Going through that public surface keeps us insulated from
7496 provider-specific internals.
7597
76- Returns ``None`` if the provider did not write an Authorization
77- header or wrote a non-Bearer scheme — neither is representable
78- in the kernel's PAT auth surface.
98+ Returns ``None`` if there is no provider, the provider did not
99+ write an Authorization header, or it wrote a non-Bearer scheme —
100+ none of which is representable in the kernel's PAT auth surface.
79101 """
102+ if auth_provider is None :
103+ return None
80104 headers : Dict [str , str ] = {}
81105 auth_provider .add_headers (headers )
82106 auth = headers .get ("Authorization" )
@@ -93,15 +117,83 @@ def _extract_bearer_token(auth_provider: AuthProvider) -> Optional[str]:
93117 return token
94118
95119
96- def kernel_auth_kwargs (auth_provider : AuthProvider ) -> Dict [str , Any ]:
120+ def kernel_auth_kwargs (
121+ auth_provider : Optional [AuthProvider ],
122+ auth_options : Optional [Dict [str , Any ]] = None ,
123+ ) -> Dict [str , Any ]:
97124 """Build the kwargs passed to ``databricks_sql_kernel.Session(...)``.
98125
99- PAT (including ``TokenFederationProvider``-wrapped PAT) routes
100- through the kernel's PAT path. Anything else raises
101- ``NotSupportedError`` — the kernel binding doesn't accept OAuth
102- today, and routing OAuth through PAT would silently break
103- token refresh during long-running sessions.
126+ ``auth_options`` carries the raw connect() kwargs relevant to auth
127+ (``auth_type``, ``oauth_client_id``, ``oauth_client_secret``,
128+ ``oauth_redirect_port``, ``credentials_provider``). They drive the
129+ OAuth decisions because the OAuth secret is consumed during
130+ ``AuthProvider`` construction and can't be read back off the built
131+ provider.
132+
133+ Resolution order:
134+
135+ 0. **Ambiguity guards** — reject conflicting auth signals *before*
136+ resolving, so an ambiguous request fails loudly at session-open
137+ rather than silently picking one flow (and failing later as a
138+ confusing 401 against the wrong principal):
139+ - a custom ``credentials_provider`` *and* M2M kwargs together;
140+ - a U2M ``auth_type`` (``databricks-oauth`` / ``azure-oauth``)
141+ *and* ``oauth_client_secret`` together.
142+ 1. **OAuth M2M** — ``oauth_client_id`` + ``oauth_client_secret``
143+ both present → forward raw creds to the kernel's ``oauth-m2m``.
144+ 2. **PAT** — the built provider is (or wraps) an
145+ ``AccessTokenAuthProvider`` → extract the bearer token.
146+ 3. **OAuth U2M** — ``auth_type`` is ``databricks-oauth`` /
147+ ``azure-oauth`` → forward optional ``oauth_client_id`` /
148+ ``oauth_redirect_port`` to the kernel's ``oauth-u2m``.
149+ 4. **Custom credentials_provider** → ``NotSupportedError`` (opaque
150+ token source; no raw creds for the kernel to own).
151+ 5. Anything else → ``NotSupportedError``.
152+
153+ M2M is checked before PAT so that a workload passing both an
154+ access token *and* M2M creds resolves to the (refreshing) M2M path
155+ rather than a static token. (Token + M2M is not treated as
156+ ambiguous: a PAT is often present as ambient config the caller
157+ didn't intend as the primary credential, whereas an explicit
158+ ``oauth_client_secret`` is unambiguous M2M intent.)
104159 """
160+ opts = auth_options or {}
161+
162+ client_id = opts .get ("oauth_client_id" )
163+ client_secret = opts .get ("oauth_client_secret" )
164+ auth_type = opts .get ("auth_type" )
165+ has_m2m = bool (client_id and client_secret )
166+
167+ # 0. Ambiguity guards — fail before any flow is chosen.
168+ if client_secret and opts .get ("credentials_provider" ) is not None :
169+ raise NotSupportedError (
170+ "Ambiguous auth on use_kernel=True: both a custom "
171+ "credentials_provider and oauth_client_secret were provided. "
172+ "Pass exactly one — oauth_client_id + oauth_client_secret for "
173+ "kernel-managed M2M, or use the Thrift backend (default) for "
174+ "credentials_provider."
175+ )
176+ if client_secret and auth_type in ("databricks-oauth" , "azure-oauth" ):
177+ raise NotSupportedError (
178+ f"Ambiguous auth on use_kernel=True: auth_type={ auth_type !r} selects "
179+ "the U2M browser flow, but oauth_client_secret was also provided "
180+ "(machine-to-machine). Drop oauth_client_secret for U2M, or drop "
181+ "auth_type for M2M."
182+ )
183+
184+ # 1. OAuth M2M — raw client-credentials pair forwarded to the kernel.
185+ if has_m2m :
186+ kwargs : Dict [str , Any ] = {
187+ "auth_type" : "oauth-m2m" ,
188+ "client_id" : client_id ,
189+ "client_secret" : client_secret ,
190+ }
191+ scopes = _normalize_scopes (opts .get ("oauth_scopes" ))
192+ if scopes is not None :
193+ kwargs ["oauth_scopes" ] = scopes
194+ return kwargs
195+
196+ # 2. PAT (including TokenFederationProvider-wrapped PAT).
105197 if _is_pat (auth_provider ):
106198 token = _extract_bearer_token (auth_provider )
107199 if not token :
@@ -111,8 +203,66 @@ def kernel_auth_kwargs(auth_provider: AuthProvider) -> Dict[str, Any]:
111203 )
112204 return {"auth_type" : "pat" , "access_token" : token }
113205
206+ # 3. OAuth U2M — browser authorization-code flow; the kernel runs it.
207+ if auth_type in ("databricks-oauth" , "azure-oauth" ):
208+ kwargs = {"auth_type" : "oauth-u2m" }
209+ if client_id :
210+ kwargs ["client_id" ] = client_id
211+ redirect_port = opts .get ("oauth_redirect_port" )
212+ if redirect_port is not None :
213+ kwargs ["redirect_port" ] = int (redirect_port )
214+ scopes = _normalize_scopes (opts .get ("oauth_scopes" ))
215+ if scopes is not None :
216+ kwargs ["oauth_scopes" ] = scopes
217+ return kwargs
218+
219+ # 4. Custom credentials_provider — the connector's primary M2M path
220+ # on Thrift/SEA, but unusable on the kernel: it's an opaque token
221+ # source with no extractable client_id/secret, so the kernel
222+ # can't own the token lifecycle. Point the caller at the raw
223+ # M2M kwargs instead.
224+ if opts .get ("credentials_provider" ) is not None :
225+ raise NotSupportedError (
226+ "use_kernel=True does not support a custom credentials_provider. "
227+ "For OAuth machine-to-machine auth, pass oauth_client_id and "
228+ "oauth_client_secret so the kernel can manage the token lifecycle "
229+ "directly; or use the Thrift backend (default) with "
230+ "credentials_provider."
231+ )
232+
233+ # 5. Everything else (including no usable credentials at all —
234+ # ``auth_provider`` is None on the kernel path when no access
235+ # token was supplied and no OAuth kwargs resolved above).
236+ provider_desc = (
237+ type (auth_provider ).__name__ if auth_provider is not None else "no credentials"
238+ )
114239 raise NotSupportedError (
115- f"The kernel backend (use_kernel=True) currently only supports PAT auth, "
116- f"but got { type (auth_provider ).__name__ } . Use the Thrift backend "
117- "(default) for OAuth / federation / custom credential providers."
240+ f"use_kernel=True requires PAT (access_token), OAuth M2M "
241+ f"(oauth_client_id + oauth_client_secret), or OAuth U2M "
242+ f"(auth_type='databricks-oauth' / 'azure-oauth'), but got "
243+ f"{ provider_desc } with auth_type={ auth_type !r} . Use the Thrift "
244+ "backend (default) for other auth flows."
245+ )
246+
247+
248+ def _normalize_scopes (scopes : Any ) -> Optional [list ]:
249+ """Normalise an ``oauth_scopes`` value to a list of strings, or
250+ ``None`` to let the kernel apply its defaults.
251+
252+ Accepts a list/tuple of strings or a single space-delimited string
253+ (the shape ``DatabricksOAuthProvider`` stores internally)."""
254+ if scopes is None :
255+ return None
256+ if isinstance (scopes , str ):
257+ parts = scopes .split ()
258+ return parts or None
259+ if isinstance (scopes , (list , tuple )):
260+ parts = [str (s ) for s in scopes if s ]
261+ return parts or None
262+ # Anything else (int, dict, bool, …) is a caller error. Fail loudly
263+ # rather than silently dropping the scopes to None and surprising
264+ # the user with default scopes.
265+ raise ProgrammingError (
266+ f"oauth_scopes must be a list/tuple of strings or a space-delimited "
267+ f"string, got { type (scopes ).__name__ } ."
118268 )
0 commit comments