diff --git a/src/routes/activity.ts b/src/routes/activity.ts new file mode 100644 index 0000000..be8eec4 --- /dev/null +++ b/src/routes/activity.ts @@ -0,0 +1,265 @@ +import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod' +import { z } from 'zod' +import { authMiddleware } from '@/middlewares/auth.middleware' +import { ActivityService } from '@/services/activity.service' +import { + createResponseSchema, + ErrorCode, + errorResponse, + errorResponseSchema, + successResponse, +} from '@/types/response' + +// Schema for request parameters +const repoParamsSchema = z.object({ + owner: z.string(), + repo: z.string(), +}) + +// Schema for query parameters +const activityQuerySchema = z.object({ + state: z.enum(['open', 'closed', 'all']).optional().default('all'), + per_page: z.coerce.number().min(1).max(100).optional().default(30), +}) + +const commitsQuerySchema = z.object({ + per_page: z.coerce.number().min(1).max(100).optional().default(30), +}) + +// Activity timeline schemas +const commitActivitySchema = z.object({ + sha: z.string(), + message: z.string(), + author: z.object({ + name: z.string(), + email: z.string(), + date: z.string(), + avatar: z.string().optional(), + }), + committer: z.object({ + name: z.string(), + date: z.string(), + }), + url: z.string(), +}) + +const pullRequestActivitySchema = z.object({ + number: z.number(), + title: z.string(), + state: z.string(), + user: z.object({ + login: z.string(), + avatar_url: z.string(), + }), + created_at: z.string(), + updated_at: z.string(), + merged_at: z.string().nullable(), + html_url: z.string(), +}) + +const issueActivitySchema = z.object({ + number: z.number(), + title: z.string(), + state: z.string(), + user: z.object({ + login: z.string(), + avatar_url: z.string(), + }), + created_at: z.string(), + updated_at: z.string(), + closed_at: z.string().nullable(), + html_url: z.string(), + labels: z + .object({ + name: z.string(), + color: z.string(), + }) + .array(), +}) + +const commitActivityStatsSchema = z.object({ + days: z.number().array(), + total: z.number(), + week: z.number(), +}) + +const contributorStatsSchema = z.object({ + author: z.object({ + login: z.string(), + avatar_url: z.string(), + }), + total: z.number(), + weeks: z + .object({ + w: z.number(), + a: z.number(), + d: z.number(), + c: z.number(), + }) + .array(), +}) + +// Response schemas +const commitsResponseSchema = createResponseSchema(commitActivitySchema.array()) +const pullRequestsResponseSchema = createResponseSchema(pullRequestActivitySchema.array()) +const issuesResponseSchema = createResponseSchema(issueActivitySchema.array()) +const commitActivityStatsResponseSchema = createResponseSchema(commitActivityStatsSchema.array()) +const contributorStatsResponseSchema = createResponseSchema(contributorStatsSchema.array()) + +export const activityRoutes: FastifyPluginAsyncZod = async (app) => { + // All routes in this plugin require authentication + app.addHook('preHandler', authMiddleware) + + // Route to get recent commits + app.get( + '/:owner/:repo/commits', + { + schema: { + description: 'Get recent commits for activity timeline', + tags: ['activity'], + params: repoParamsSchema, + querystring: commitsQuerySchema, + response: { + 200: commitsResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { per_page } = request.query + const { accessToken, username } = request.user + const activityService = new ActivityService(accessToken, username) + const commits = await activityService.getRecentCommits(owner, repo, per_page) + return successResponse(commits) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch commits' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get recent pull requests + app.get( + '/:owner/:repo/pulls', + { + schema: { + description: 'Get recent pull requests for activity timeline', + tags: ['activity'], + params: repoParamsSchema, + querystring: activityQuerySchema, + response: { + 200: pullRequestsResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { state, per_page } = request.query + const { accessToken, username } = request.user + const activityService = new ActivityService(accessToken, username) + const pulls = await activityService.getRecentPullRequests(owner, repo, state, per_page) + return successResponse(pulls) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch pull requests' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get recent issues + app.get( + '/:owner/:repo/issues', + { + schema: { + description: 'Get recent issues for activity timeline (excludes PRs)', + tags: ['activity'], + params: repoParamsSchema, + querystring: activityQuerySchema, + response: { + 200: issuesResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { state, per_page } = request.query + const { accessToken, username } = request.user + const activityService = new ActivityService(accessToken, username) + const issues = await activityService.getRecentIssues(owner, repo, state, per_page) + return successResponse(issues) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch issues' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get commit activity stats (heatmap data) + app.get( + '/:owner/:repo/commit-stats', + { + schema: { + description: + 'Get commit activity statistics (weekly) for heatmap. ⚠️ Only for repos with <10k commits', + tags: ['activity'], + params: repoParamsSchema, + response: { + 200: commitActivityStatsResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { accessToken, username } = request.user + const activityService = new ActivityService(accessToken, username) + const stats = await activityService.getCommitActivityStats(owner, repo) + return successResponse(stats) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch commit activity stats' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get contributor statistics + app.get( + '/:owner/:repo/contributors', + { + schema: { + description: + 'Get contributor statistics with weekly breakdown. ⚠️ Only for repos with <10k commits', + tags: ['activity'], + params: repoParamsSchema, + response: { + 200: contributorStatsResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { accessToken, username } = request.user + const activityService = new ActivityService(accessToken, username) + const stats = await activityService.getContributorStats(owner, repo) + return successResponse(stats) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch contributor stats' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) +} diff --git a/src/routes/code-stats.ts b/src/routes/code-stats.ts new file mode 100644 index 0000000..b400bcb --- /dev/null +++ b/src/routes/code-stats.ts @@ -0,0 +1,234 @@ +import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod' +import { z } from 'zod' +import { authMiddleware } from '@/middlewares/auth.middleware' +import { CodeStatsService } from '@/services/code-stats.service' +import { + createResponseSchema, + ErrorCode, + errorResponse, + errorResponseSchema, + successResponse, +} from '@/types/response' + +// Schema for request parameters +const repoParamsSchema = z.object({ + owner: z.string(), + repo: z.string(), +}) + +// Query schemas +const fileTreeQuerySchema = z.object({ + branch: z.string().optional().default('main'), + recursive: z.coerce.boolean().optional().default(true), +}) + +const hotFilesQuerySchema = z.object({ + limit: z.coerce.number().min(10).max(200).optional().default(100), + top: z.coerce.number().min(5).max(50).optional().default(20), +}) + +const fileStructureQuerySchema = z.object({ + branch: z.string().optional().default('main'), +}) + +// Response schemas +const languageStatsSchema = z.record(z.string(), z.number()) + +const codeFrequencySchema = z.object({ + week: z.number(), + additions: z.number(), + deletions: z.number(), +}) + +const fileTreeSchema = z.object({ + path: z.string(), + mode: z.string(), + type: z.enum(['blob', 'tree']), + sha: z.string(), + size: z.number().optional(), + url: z.string().optional(), +}) + +const hotFileSchema = z.object({ + path: z.string(), + changeCount: z.number(), + lastModified: z.string(), + authors: z.string().array(), +}) + +const fileStructureSummarySchema = z.object({ + totalFiles: z.number(), + totalDirectories: z.number(), + totalSize: z.number(), + filesByExtension: z.record(z.string(), z.number()), + largestFiles: z + .object({ + path: z.string(), + size: z.number(), + }) + .array(), +}) + +// Response schemas with unified format +const languageStatsResponseSchema = createResponseSchema(languageStatsSchema) +const codeFrequencyResponseSchema = createResponseSchema(codeFrequencySchema.array()) +const fileTreeResponseSchema = createResponseSchema(fileTreeSchema.array()) +const hotFilesResponseSchema = createResponseSchema(hotFileSchema.array()) +const fileStructureSummaryResponseSchema = createResponseSchema(fileStructureSummarySchema) + +export const codeStatsRoutes: FastifyPluginAsyncZod = async (app) => { + // All routes in this plugin require authentication + app.addHook('preHandler', authMiddleware) + + // Route to get language distribution + app.get( + '/:owner/:repo/languages', + { + schema: { + description: 'Get programming language distribution (in bytes)', + tags: ['code-stats'], + params: repoParamsSchema, + response: { + 200: languageStatsResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { accessToken, username } = request.user + const codeStatsService = new CodeStatsService(accessToken, username) + const languages = await codeStatsService.getLanguageStats(owner, repo) + return successResponse(languages) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch language statistics' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get code frequency (weekly additions/deletions) + app.get( + '/:owner/:repo/code-frequency', + { + schema: { + description: + 'Get code frequency statistics (weekly additions/deletions). ⚠️ Only for repos with <10k commits', + tags: ['code-stats'], + params: repoParamsSchema, + response: { + 200: codeFrequencyResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { accessToken, username } = request.user + const codeStatsService = new CodeStatsService(accessToken, username) + const frequency = await codeStatsService.getCodeFrequency(owner, repo) + return successResponse(frequency) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch code frequency' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get file tree structure + app.get( + '/:owner/:repo/file-tree', + { + schema: { + description: 'Get repository file tree structure', + tags: ['code-stats'], + params: repoParamsSchema, + querystring: fileTreeQuerySchema, + response: { + 200: fileTreeResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { branch, recursive } = request.query + const { accessToken, username } = request.user + const codeStatsService = new CodeStatsService(accessToken, username) + const fileTree = await codeStatsService.getFileTree(owner, repo, branch, recursive) + return successResponse(fileTree) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch file tree' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get hot files (most frequently modified) + app.get( + '/:owner/:repo/hot-files', + { + schema: { + description: 'Get hot files (most frequently modified files) by analyzing recent commits', + tags: ['code-stats'], + params: repoParamsSchema, + querystring: hotFilesQuerySchema, + response: { + 200: hotFilesResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { limit, top } = request.query + const { accessToken, username } = request.user + const codeStatsService = new CodeStatsService(accessToken, username) + const hotFiles = await codeStatsService.getHotFiles(owner, repo, limit, top) + return successResponse(hotFiles) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to analyze hot files' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) + + // Route to get file structure summary + app.get( + '/:owner/:repo/file-structure-summary', + { + schema: { + description: 'Get file structure summary (counts by type, largest files, etc.)', + tags: ['code-stats'], + params: repoParamsSchema, + querystring: fileStructureQuerySchema, + response: { + 200: fileStructureSummaryResponseSchema, + 500: errorResponseSchema, + }, + }, + }, + async (request, reply) => { + try { + const { owner, repo } = request.params + const { branch } = request.query + const { accessToken, username } = request.user + const codeStatsService = new CodeStatsService(accessToken, username) + const summary = await codeStatsService.getFileStructureSummary(owner, repo, branch) + return successResponse(summary) + } catch (err) { + const error = err as Error + const errorMessage = error.message || 'Failed to fetch file structure summary' + return reply.status(500).send(errorResponse(ErrorCode.GITHUB_API_ERROR, errorMessage)) + } + } + ) +} diff --git a/src/routes/repos.ts b/src/routes/repos.ts index 33a9c0a..c06312d 100644 --- a/src/routes/repos.ts +++ b/src/routes/repos.ts @@ -46,7 +46,7 @@ const repoOverviewSchema = z.object({ size: z.number(), language: z.string().nullable(), languages: z.record(z.string(), z.number()).nullable(), - topics: z.array(z.string()), + topics: z.string().array(), licenseName: z.string().nullable(), licenseKey: z.string().nullable(), githubCreatedAt: z.date(), @@ -81,7 +81,7 @@ const repoListItemSchema = z.object({ }), }) -const reposListSchema = z.array(repoListItemSchema) +const reposListSchema = repoListItemSchema.array() // Use the unified response format const reposListResponseSchema = createResponseSchema(reposListSchema) diff --git a/src/server.ts b/src/server.ts index 3bc65e1..45e1696 100644 --- a/src/server.ts +++ b/src/server.ts @@ -11,6 +11,8 @@ import 'dotenv/config' import { authRoutes } from '@/routes/auth' import { repoRoutes } from '@/routes/repos' +import { activityRoutes } from '@/routes/activity' +import { codeStatsRoutes } from '@/routes/code-stats' import { statsRoutes } from '@/routes/stats' import { webhookRoutes } from '@/routes/webhooks' import { ErrorCode, errorResponse } from '@/types/response' @@ -117,6 +119,14 @@ GitHub API rate limits apply. Authenticated requests: 5,000/hour. name: 'repos', description: 'Repository management and statistics', }, + { + name: 'activity', + description: 'Repository activity timeline (commits, PRs, issues)', + }, + { + name: 'code-stats', + description: 'Code statistics and analysis (languages, file structure, hot files)', + }, { name: 'stats', description: 'User statistics and analytics', @@ -175,6 +185,8 @@ GitHub API rate limits apply. Authenticated requests: 5,000/hour. // Register routes await app.register(authRoutes, { prefix: '/auth' }) await app.register(repoRoutes, { prefix: '/repos' }) + await app.register(activityRoutes, { prefix: '/activity' }) + await app.register(codeStatsRoutes, { prefix: '/code-stats' }) await app.register(statsRoutes, { prefix: '/stats' }) await app.register(webhookRoutes, { prefix: '/webhooks' }) diff --git a/src/services/activity.service.ts b/src/services/activity.service.ts new file mode 100644 index 0000000..d9041a4 --- /dev/null +++ b/src/services/activity.service.ts @@ -0,0 +1,351 @@ +import { Octokit } from '@octokit/rest' +import { redis } from '@/lib/redis' + +// Activity timeline types +export interface CommitActivity { + sha: string + message: string + author: { + name: string + email: string + date: string + avatar?: string + } + committer: { + name: string + date: string + } + url: string +} + +export interface PullRequestActivity { + number: number + title: string + state: string + user: { + login: string + avatar_url: string + } + created_at: string + updated_at: string + merged_at: string | null + html_url: string +} + +export interface IssueActivity { + number: number + title: string + state: string + user: { + login: string + avatar_url: string + } + created_at: string + updated_at: string + closed_at: string | null + html_url: string + labels: { + name: string + color: string + }[] +} + +export interface CommitActivityStats { + days: number[] // 7 days, 0 = Sunday + total: number + week: number // Unix timestamp +} + +export interface ContributorStats { + author: { + login: string + avatar_url: string + } + total: number + weeks: { + w: number // Week timestamp + a: number // Additions + d: number // Deletions + c: number // Commits + }[] +} + +export class ActivityService { + private octokit: Octokit + + constructor(accessToken: string, _username: string) { + this.octokit = new Octokit({ auth: accessToken }) + } + + /** + * Get recent commits for activity timeline + */ + async getRecentCommits(owner: string, repo: string, per_page = 30): Promise { + const cacheKey = `repo:commits:${owner}/${repo}:${String(per_page)}` + const CACHE_TTL = 300 // 5 minutes + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as CommitActivity[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + const { data } = await this.octokit.repos.listCommits({ + owner, + repo, + per_page, + }) + + const commits: CommitActivity[] = data.map((commit) => ({ + sha: commit.sha, + message: commit.commit.message, + author: { + name: commit.commit.author?.name ?? 'Unknown', + email: commit.commit.author?.email ?? '', + date: commit.commit.author?.date ?? '', + avatar: commit.author?.avatar_url, + }, + committer: { + name: commit.commit.committer?.name ?? 'Unknown', + date: commit.commit.committer?.date ?? '', + }, + url: commit.html_url, + })) + + try { + await redis.set(cacheKey, JSON.stringify(commits), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache commits:', error) + } + + return commits + } + + /** + * Get recent pull requests for activity timeline + */ + async getRecentPullRequests( + owner: string, + repo: string, + state: 'open' | 'closed' | 'all' = 'all', + per_page = 30 + ): Promise { + const cacheKey = `repo:prs:${owner}/${repo}:${state}:${String(per_page)}` + const CACHE_TTL = 300 // 5 minutes + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as PullRequestActivity[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + const { data } = await this.octokit.pulls.list({ + owner, + repo, + state, + per_page, + sort: 'updated', + direction: 'desc', + }) + + const pullRequests: PullRequestActivity[] = data.map((pr) => ({ + number: pr.number, + title: pr.title, + state: pr.state, + user: { + login: pr.user?.login ?? 'Unknown', + avatar_url: pr.user?.avatar_url ?? '', + }, + created_at: pr.created_at, + updated_at: pr.updated_at, + merged_at: pr.merged_at, + html_url: pr.html_url, + })) + + try { + await redis.set(cacheKey, JSON.stringify(pullRequests), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache pull requests:', error) + } + + return pullRequests + } + + /** + * Get recent issues for activity timeline + */ + async getRecentIssues( + owner: string, + repo: string, + state: 'open' | 'closed' | 'all' = 'all', + per_page = 30 + ): Promise { + const cacheKey = `repo:issues:${owner}/${repo}:${state}:${String(per_page)}` + const CACHE_TTL = 300 // 5 minutes + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as IssueActivity[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + const { data } = await this.octokit.issues.listForRepo({ + owner, + repo, + state, + per_page, + sort: 'updated', + direction: 'desc', + }) + + // Filter out pull requests (GitHub API returns both issues and PRs) + const issues: IssueActivity[] = data + .filter((issue) => !issue.pull_request) + .map((issue) => ({ + number: issue.number, + title: issue.title, + state: issue.state, + user: { + login: issue.user?.login ?? 'Unknown', + avatar_url: issue.user?.avatar_url ?? '', + }, + created_at: issue.created_at, + updated_at: issue.updated_at, + closed_at: issue.closed_at, + html_url: issue.html_url, + labels: issue.labels.map((label) => ({ + name: typeof label === 'string' ? label : (label.name ?? ''), + color: typeof label === 'string' ? '' : (label.color ?? ''), + })), + })) + + try { + await redis.set(cacheKey, JSON.stringify(issues), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache issues:', error) + } + + return issues + } + + /** + * Get commit activity statistics (weekly commit activity) + * ⚠️ Only available for repositories with < 10,000 commits + * ⚠️ First request returns 202, need to retry + */ + async getCommitActivityStats(owner: string, repo: string): Promise { + const cacheKey = `repo:commit-activity:${owner}/${repo}` + const CACHE_TTL = 3600 // 1 hour (GitHub caches this data) + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as CommitActivityStats[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + // GitHub returns 202 on first request, need to retry + const maxRetries = 3 + for (let retry = 0; retry < maxRetries; retry++) { + try { + const response = await this.octokit.repos.getCommitActivityStats({ owner, repo }) + + if (response.status === 200 && response.data.length > 0) { + const stats = response.data as CommitActivityStats[] + + try { + await redis.set(cacheKey, JSON.stringify(stats), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache commit activity stats:', error) + } + + return stats + } + + // If 202, wait and retry + if (response.status === 202 && retry < maxRetries - 1) { + await new Promise((resolve) => setTimeout(resolve, 2000)) + continue + } + } catch (error) { + console.error('Error fetching commit activity stats:', error) + throw error + } + } + + throw new Error('Failed to fetch commit activity stats after retries') + } + + /** + * Get contributor statistics + * ⚠️ Only available for repositories with < 10,000 commits + * ⚠️ First request returns 202, need to retry + */ + async getContributorStats(owner: string, repo: string): Promise { + const cacheKey = `repo:contributor-stats:${owner}/${repo}` + const CACHE_TTL = 3600 // 1 hour (GitHub caches this data) + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as ContributorStats[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + // GitHub returns 202 on first request, need to retry + const maxRetries = 3 + for (let retry = 0; retry < maxRetries; retry++) { + try { + const response = await this.octokit.repos.getContributorsStats({ owner, repo }) + + if (response.status === 200 && response.data.length > 0) { + const stats: ContributorStats[] = response.data.map((contributor) => ({ + author: { + login: contributor.author?.login ?? 'Unknown', + avatar_url: contributor.author?.avatar_url ?? '', + }, + total: contributor.total, + weeks: contributor.weeks.map((week) => ({ + w: week.w ?? 0, + a: week.a ?? 0, + d: week.d ?? 0, + c: week.c ?? 0, + })), + })) + + try { + await redis.set(cacheKey, JSON.stringify(stats), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache contributor stats:', error) + } + + return stats + } + + // If 202, wait and retry + if (response.status === 202 && retry < maxRetries - 1) { + await new Promise((resolve) => setTimeout(resolve, 2000)) + continue + } + } catch (error) { + console.error('Error fetching contributor stats:', error) + throw error + } + } + + throw new Error('Failed to fetch contributor stats after retries') + } +} diff --git a/src/services/code-stats.service.ts b/src/services/code-stats.service.ts new file mode 100644 index 0000000..9368a09 --- /dev/null +++ b/src/services/code-stats.service.ts @@ -0,0 +1,365 @@ +import { Octokit } from '@octokit/rest' +import { redis } from '@/lib/redis' + +// Language distribution +export type LanguageStats = Record // bytes of code + +// Code frequency (weekly additions/deletions) +export interface CodeFrequency { + week: number // Unix timestamp + additions: number + deletions: number +} + +// File tree structure +export interface FileTree { + path: string + mode: string + type: 'blob' | 'tree' + sha: string + size?: number + url?: string +} + +// Hot files (most frequently modified) +export interface HotFile { + path: string + changeCount: number + lastModified: string + authors: string[] +} + +export class CodeStatsService { + private octokit: Octokit + + constructor(accessToken: string, _username: string) { + this.octokit = new Octokit({ auth: accessToken }) + } + + /** + * Get language distribution (in bytes) + * Returns languages used in the repository with byte counts + */ + async getLanguageStats(owner: string, repo: string): Promise { + const cacheKey = `repo:languages:${owner}/${repo}` + const CACHE_TTL = 3600 // 1 hour + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as LanguageStats + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + const { data } = await this.octokit.repos.listLanguages({ + owner, + repo, + }) + + try { + await redis.set(cacheKey, JSON.stringify(data), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache language stats:', error) + } + + return data + } + + /** + * Get code frequency statistics (weekly additions/deletions) + * ⚠️ Only available for repositories with < 10,000 commits + * ⚠️ First request returns 202, need to retry + */ + async getCodeFrequency(owner: string, repo: string): Promise { + const cacheKey = `repo:code-frequency:${owner}/${repo}` + const CACHE_TTL = 3600 // 1 hour + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as CodeFrequency[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + const response = await this.fetchCodeFrequencyWithRetry(owner, repo) + + const stats: CodeFrequency[] = response.data.map((item) => ({ + week: item[0] ?? 0, + additions: item[1] ?? 0, + deletions: Math.abs(item[2] ?? 0), + })) + + try { + await redis.set(cacheKey, JSON.stringify(stats), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache code frequency stats:', error) + } + + return stats + } + + /** + * Fetch code frequency with retry logic (recursive) + */ + private async fetchCodeFrequencyWithRetry( + owner: string, + repo: string, + attempt = 1 + ): Promise<{ data: number[][] }> { + const maxAttempts = 3 + const response = await this.octokit.repos.getCodeFrequencyStats({ owner, repo }) + + if (response.status === 200 && response.data.length > 0) { + return { data: response.data } + } + + if (response.status === 202 && attempt < maxAttempts) { + await new Promise((resolve) => setTimeout(resolve, 2000)) + return this.fetchCodeFrequencyWithRetry(owner, repo, attempt + 1) + } + + throw new Error(`Failed to fetch code frequency after ${String(attempt)} attempts`) + } + + /** + * Get repository file tree structure + * @param recursive If true, get all files recursively + */ + async getFileTree( + owner: string, + repo: string, + branch = 'main', + recursive = true + ): Promise { + const cacheKey = `repo:tree:${owner}/${repo}:${branch}:${String(recursive)}` + const CACHE_TTL = 1800 // 30 minutes + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as FileTree[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + // First, get the branch to find the tree SHA + const { data: branchData } = await this.octokit.repos.getBranch({ + owner, + repo, + branch, + }) + + const treeSha = branchData.commit.commit.tree.sha + + // Then get the tree + const { data } = await this.octokit.git.getTree({ + owner, + repo, + tree_sha: treeSha, + recursive: recursive ? 'true' : undefined, + }) + + const fileTree: FileTree[] = data.tree.map((item) => ({ + path: item.path, + mode: item.mode, + type: item.type as 'blob' | 'tree', + sha: item.sha, + size: item.size, + url: item.url, + })) + + try { + await redis.set(cacheKey, JSON.stringify(fileTree), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache file tree:', error) + } + + return fileTree + } + + /** + * Analyze hot files (most frequently modified files) + * This analyzes recent commits to find which files are changed most often + * @param limit Number of commits to analyze (default: 100) + * @param topN Return top N hot files (default: 20) + */ + async getHotFiles(owner: string, repo: string, limit = 100, topN = 20): Promise { + const cacheKey = `repo:hot-files:${owner}/${repo}:${String(limit)}:${String(topN)}` + const CACHE_TTL = 1800 // 30 minutes + + try { + const cached = await redis.get(cacheKey) + if (cached) { + return JSON.parse(cached as string) as HotFile[] + } + } catch (error) { + console.warn('Redis cache unavailable:', error) + } + + // Get recent commits + const { data: commits } = await this.octokit.repos.listCommits({ + owner, + repo, + per_page: Math.min(limit, 100), + }) + + // Track file changes + const fileChanges = new Map< + string, + { + count: number + lastModified: string + authors: Set + } + >() + + // Process commits + await this.processCommitsForHotFiles(owner, repo, commits, fileChanges) + + // Convert to array and sort + const hotFiles = this.sortAndLimitHotFiles(fileChanges, topN) + + try { + await redis.set(cacheKey, JSON.stringify(hotFiles), { ex: CACHE_TTL }) + } catch (error) { + console.warn('Failed to cache hot files:', error) + } + + return hotFiles + } + + /** + * Process commits to track file changes + */ + private async processCommitsForHotFiles( + owner: string, + repo: string, + commits: { sha: string }[], + fileChanges: Map }> + ) { + const commitPromises = commits.map(async (commit) => + this.octokit.repos + .getCommit({ + owner, + repo, + ref: commit.sha, + }) + .then((result) => result.data) + .catch((error: unknown) => { + console.warn(`Failed to fetch commit ${commit.sha}:`, error) + return null + }) + ) + + const commitDetails = await Promise.all(commitPromises) + + for (const commitDetail of commitDetails) { + if (!commitDetail) continue + + const author = commitDetail.commit.author?.name ?? 'Unknown' + const date = commitDetail.commit.author?.date ?? new Date().toISOString() + + if (commitDetail.files) { + this.updateFileChanges(commitDetail.files, fileChanges, author, date) + } + } + } + + /** + * Update file changes tracking + */ + private updateFileChanges( + files: { filename: string }[], + fileChanges: Map }>, + author: string, + date: string + ) { + for (const file of files) { + const path = file.filename + + if (!fileChanges.has(path)) { + fileChanges.set(path, { + count: 0, + lastModified: date, + authors: new Set(), + }) + } + + const fileData = fileChanges.get(path) + if (fileData) { + fileData.count++ + fileData.authors.add(author) + + // Update last modified if this commit is more recent + if (new Date(date) > new Date(fileData.lastModified)) { + fileData.lastModified = date + } + } + } + } + + /** + * Sort and limit hot files + */ + private sortAndLimitHotFiles( + fileChanges: Map }>, + topN: number + ): HotFile[] { + return Array.from(fileChanges.entries()) + .map(([path, data]) => ({ + path, + changeCount: data.count, + lastModified: data.lastModified, + authors: Array.from(data.authors), + })) + .sort((a, b) => b.changeCount - a.changeCount) + .slice(0, topN) + } + + /** + * Get file structure summary (counts by type) + */ + async getFileStructureSummary(owner: string, repo: string, branch = 'main') { + const files = await this.getFileTree(owner, repo, branch, true) + + const summary = { + totalFiles: 0, + totalDirectories: 0, + totalSize: 0, + filesByExtension: new Map(), + largestFiles: [] as { path: string; size: number }[], + } + + for (const file of files) { + if (file.type === 'blob') { + summary.totalFiles++ + summary.totalSize += file.size ?? 0 + + // Track by extension + const ext = file.path.split('.').pop() ?? 'no-extension' + const currentCount = summary.filesByExtension.get(ext) ?? 0 + summary.filesByExtension.set(ext, currentCount + 1) + } else { + summary.totalDirectories++ + } + } + + // Get top 10 largest files + const largestFiles = files + .filter((f) => f.type === 'blob' && f.size) + .sort((a, b) => (b.size ?? 0) - (a.size ?? 0)) + .slice(0, 10) + .map((f) => ({ path: f.path, size: f.size ?? 0 })) + + return { + ...summary, + filesByExtension: Object.fromEntries(summary.filesByExtension), + largestFiles, + } + } +}