11import { publish } from '@/events/lib/hydro'
22import { hydroNames } from '@/events/lib/schema'
3+ import { createLogger } from '@/observability/logger'
4+
5+ const logger = createLogger ( import . meta. url )
36
47/**
58 * Handles search analytics and client_name validation for external requests
@@ -40,20 +43,29 @@ export async function handleExternalSearchAnalytics(
4043 else if ( normalizedHost . endsWith ( '.github.net' ) || normalizedHost . endsWith ( '.githubapp.com' ) ) {
4144 return null
4245 }
43- // For localhost development without client_name, we'll still send analytics below
4446 }
4547
4648 // For localhost, ensure we have a client_name for analytics
4749 if ( normalizedHost === 'localhost' && ! client_name ) {
4850 client_name = 'localhost'
4951 }
5052
53+ // Log when we detect an external request that we will send analytics for
54+ if ( client_name && client_name !== 'docs.github.com-client' ) {
55+ logger . info ( 'External search analytics: Sending analytics for external client' , {
56+ client_name,
57+ searchContext,
58+ isLikelyExternalAPI,
59+ normalizedHost,
60+ userAgent : sanitizeUserAgent ( req . headers [ 'user-agent' ] ) ,
61+ } )
62+ }
63+
5164 // Send search event with client identifier
5265 try {
53- await publish ( {
66+ const analyticsPayload = {
5467 schema : hydroNames . search ,
5568 value : {
56- type : 'search' ,
5769 version : '1.0.0' ,
5870 context : {
5971 event_id : crypto . randomUUID ( ) ,
@@ -73,7 +85,9 @@ export async function handleExternalSearchAnalytics(
7385 search_context : searchContext ,
7486 search_client : client_name as string ,
7587 } ,
76- } )
88+ }
89+
90+ await publish ( analyticsPayload )
7791 } catch ( error ) {
7892 // Don't fail the request if analytics fails
7993 console . error ( 'Failed to send search analytics:' , error )
@@ -82,6 +96,34 @@ export async function handleExternalSearchAnalytics(
8296 return null
8397}
8498
99+ /**
100+ * Sanitizes user agent by extracting only the main client type
101+ * Returns a safe string with just the primary client identifier
102+ */
103+ function sanitizeUserAgent ( userAgent : string | undefined ) : string {
104+ if ( ! userAgent ) return 'unknown'
105+
106+ // Extract common client types while removing version numbers and detailed info
107+ const patterns = [
108+ { regex : / ^ c u r l / i, name : 'curl' } ,
109+ { regex : / ^ w g e t / i, name : 'wget' } ,
110+ { regex : / p y t h o n - r e q u e s t s / i, name : 'python-requests' } ,
111+ { regex : / a x i o s / i, name : 'axios' } ,
112+ { regex : / n o d e - f e t c h / i, name : 'node-fetch' } ,
113+ { regex : / G o - h t t p - c l i e n t / i, name : 'go-http-client' } ,
114+ { regex : / o k h t t p / i, name : 'okhttp' } ,
115+ { regex : / M o z i l l a / i, name : 'browser' } ,
116+ ]
117+
118+ for ( const pattern of patterns ) {
119+ if ( pattern . regex . test ( userAgent ) ) {
120+ return pattern . name
121+ }
122+ }
123+
124+ return 'other'
125+ }
126+
85127/**
86128 * Determines if a host should bypass client_name requirement for analytics
87129 * Returns true if the host ends with github.net or githubapp.com
0 commit comments