-
Notifications
You must be signed in to change notification settings - Fork 394
add auth login failure telemetry #3305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -302,7 +302,10 @@ impl BuilderIdToken { | |
| } | ||
|
|
||
| /// Load the token from the keychain, refresh the token if it is expired and return it | ||
| pub async fn load(database: &Database) -> Result<Option<Self>, AuthError> { | ||
| pub async fn load( | ||
| database: &Database, | ||
| telemetry: Option<&crate::telemetry::TelemetryThread>, | ||
| ) -> Result<Option<Self>, AuthError> { | ||
| // Can't use #[cfg(test)] without breaking lints, and we don't want to require | ||
| // authentication in order to run ChatSession tests. Hence, adding this here with cfg!(test) | ||
| if cfg!(test) { | ||
|
|
@@ -328,7 +331,7 @@ impl BuilderIdToken { | |
|
|
||
| if token.is_expired() { | ||
| trace!("token is expired, refreshing"); | ||
| token.refresh_token(&client, database, ®ion).await | ||
| token.refresh_token(&client, database, ®ion, telemetry).await | ||
| } else { | ||
| trace!(?token, "found a valid token"); | ||
| Ok(Some(token)) | ||
|
|
@@ -357,6 +360,7 @@ impl BuilderIdToken { | |
| client: &Client, | ||
| database: &Database, | ||
| region: &Region, | ||
| telemetry: Option<&crate::telemetry::TelemetryThread>, | ||
| ) -> Result<Option<Self>, AuthError> { | ||
| let Some(refresh_token) = &self.refresh_token else { | ||
| warn!("no refresh token was found"); | ||
|
|
@@ -416,6 +420,25 @@ impl BuilderIdToken { | |
| let display_err = DisplayErrorContext(&err); | ||
| error!("Failed to refresh builder id access token: {}", display_err); | ||
|
|
||
| // Send telemetry for refresh failure | ||
| if let Some(telemetry) = telemetry { | ||
| let auth_method = match self.token_type() { | ||
| TokenType::BuilderId => "BuilderId", | ||
| TokenType::IamIdentityCenter => "IdentityCenter", | ||
| }; | ||
| let oauth_flow = match self.oauth_flow { | ||
| OAuthFlow::DeviceCode => "DeviceCode", | ||
| OAuthFlow::Pkce => "PKCE", | ||
| }; | ||
| let error_code = match &err { | ||
| SdkError::ServiceError(service_err) => service_err.err().meta().code().map(|s| s.to_string()), | ||
| _ => None, | ||
| }; | ||
| telemetry | ||
| .send_auth_failed(auth_method, oauth_flow, "TokenRefresh", error_code) | ||
| .ok(); | ||
| } | ||
|
|
||
| // if the error is the client's fault, clear the token | ||
| if let SdkError::ServiceError(service_err) = &err { | ||
| if !service_err.err().is_slow_down_exception() { | ||
|
|
@@ -471,11 +494,7 @@ impl BuilderIdToken { | |
| } | ||
|
|
||
| pub fn token_type(&self) -> TokenType { | ||
| match &self.start_url { | ||
| Some(url) if url == START_URL => TokenType::BuilderId, | ||
| None => TokenType::BuilderId, | ||
| Some(_) => TokenType::IamIdentityCenter, | ||
| } | ||
| token_type_from_start_url(self.start_url.as_deref()) | ||
| } | ||
|
|
||
| /// Check if the token is for the internal amzn start URL (`https://amzn.awsapps.com/start`), | ||
|
|
@@ -486,6 +505,14 @@ impl BuilderIdToken { | |
| } | ||
| } | ||
|
|
||
| pub fn token_type_from_start_url(start_url: Option<&str>) -> TokenType { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe just implement an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. Will take care of this. |
||
| match start_url { | ||
| Some(url) if url == START_URL => TokenType::BuilderId, | ||
| None => TokenType::BuilderId, | ||
| Some(_) => TokenType::IamIdentityCenter, | ||
| } | ||
| } | ||
|
|
||
| pub enum PollCreateToken { | ||
| Pending, | ||
| Complete, | ||
|
|
@@ -498,6 +525,7 @@ pub async fn poll_create_token( | |
| device_code: String, | ||
| start_url: Option<String>, | ||
| region: Option<String>, | ||
| telemetry: &crate::telemetry::TelemetryThread, | ||
| ) -> PollCreateToken { | ||
| let region = region.clone().map_or(OIDC_BUILDER_ID_REGION, Region::new); | ||
| let client = client(region.clone()); | ||
|
|
@@ -538,6 +566,20 @@ pub async fn poll_create_token( | |
| }, | ||
| Err(err) => { | ||
| error!(?err, "Failed to poll for builder id token"); | ||
|
|
||
| // Send telemetry for device code failure | ||
| let auth_method = match token_type_from_start_url(start_url.as_deref()) { | ||
| TokenType::BuilderId => "BuilderId", | ||
| TokenType::IamIdentityCenter => "IdentityCenter", | ||
| }; | ||
| let error_code = match &err { | ||
| SdkError::ServiceError(service_err) => service_err.err().meta().code().map(|s| s.to_string()), | ||
| _ => None, | ||
| }; | ||
| telemetry | ||
| .send_auth_failed(auth_method, "DeviceCode", "NewLogin", error_code) | ||
| .ok(); | ||
|
|
||
| PollCreateToken::Error(err.into()) | ||
| }, | ||
| } | ||
|
|
@@ -550,7 +592,7 @@ pub async fn is_logged_in(database: &mut Database) -> bool { | |
| return true; | ||
| } | ||
|
|
||
| match BuilderIdToken::load(database).await { | ||
| match BuilderIdToken::load(database, None).await { | ||
| Ok(Some(_)) => true, | ||
| Ok(None) => { | ||
| info!("not logged in - no valid token found"); | ||
|
|
@@ -585,7 +627,7 @@ pub async fn logout(database: &mut Database) -> Result<(), AuthError> { | |
| pub async fn get_start_url_and_region(database: &Database) -> (Option<String>, Option<String>) { | ||
| // NOTE: Database provides direct methods to access the start_url and region, but they are not | ||
| // guaranteed to be up to date in the chat session. Example: login is changed mid-chat session. | ||
| let token = BuilderIdToken::load(database).await; | ||
| let token = BuilderIdToken::load(database, None).await; | ||
| match token { | ||
| Ok(Some(t)) => (t.start_url, t.region), | ||
| _ => (None, None), | ||
|
|
@@ -603,7 +645,7 @@ impl ResolveIdentity for BearerResolver { | |
| ) -> IdentityFuture<'a> { | ||
| IdentityFuture::new_boxed(Box::pin(async { | ||
| let database = Database::new().await?; | ||
| match BuilderIdToken::load(&database).await? { | ||
| match BuilderIdToken::load(&database, None).await? { | ||
| Some(token) => Ok(Identity::new( | ||
| Token::new(token.access_token.0.clone(), Some(token.expires_at.into())), | ||
| Some(token.expires_at.into()), | ||
|
|
@@ -618,7 +660,7 @@ pub async fn is_idc_user(database: &Database) -> Result<bool> { | |
| if cfg!(test) { | ||
| return Ok(false); | ||
| } | ||
| if let Ok(Some(token)) = BuilderIdToken::load(database).await { | ||
| if let Ok(Some(token)) = BuilderIdToken::load(database, None).await { | ||
| Ok(token.token_type() == TokenType::IamIdentityCenter) | ||
| } else { | ||
| Err(eyre!("No auth token found - is the user signed in?")) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need to be an Option type? Do we not want to always emit something when token refresh fails?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its because of the load function. There are cases when we can pass None to it for telemetry because its not a login failure case. And hence that gets passed down. So we just implemented it to be able to handle a None for telemetry.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh alright. I'll let the second PR chime in here. Not familiar enough with the auth flow to have more insights here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is more like a code arrangement thing now. Either I handle the None in one place vs the other.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
imo, instead of making this an Option, you can just check if there's an error and only then publish the auth-failed?
Also, do you wanna publish an auth-succeeded metric?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the issue here is that load is being called in a-lot of places which may not necessarily need telemetry publish. So instead of adding the code everywhere to have telemetry to pass, I am allowing a None. We are already only posting auth_failed only when we have an error.
We already have a login successful metric. So that covers it.