From 279e18e781d2c4813bd7495e77a5cf0f7e28e852 Mon Sep 17 00:00:00 2001 From: Maximilian Walz Date: Wed, 16 Apr 2025 16:22:59 +0200 Subject: [PATCH] Add readme and first script --- ContextExtractor.ps1 | 132 +++++++++++++++++++++++++++++++++++++++++++ README.md | 12 ++++ 2 files changed, 144 insertions(+) create mode 100644 ContextExtractor.ps1 create mode 100644 README.md diff --git a/ContextExtractor.ps1 b/ContextExtractor.ps1 new file mode 100644 index 0000000..78952a5 --- /dev/null +++ b/ContextExtractor.ps1 @@ -0,0 +1,132 @@ +param ( + [Parameter(Mandatory=$false)] + [string[]]$AdditionalExtensions, + + [Parameter(Mandatory=$false)] + [string[]]$AdditionalFiles, + + [Parameter(Mandatory=$false)] + [switch]$KeepLineBreaks +) + +# Use current directory as the root directory +$DirectoryPath = Get-Location + +# Default files to include +$defaultFiles = @( + "build.gradle", + "application.properties" +) + +# Function to check if a file is in a test package +function Is-TestFile { + param ( + [string]$FilePath + ) + + # Check if the path contains \test\ folder or .test. in the path + return ($FilePath -match "\\test\\") -or ($FilePath -match "\.test\.") +} + +# Function to extract and minify file content with header +function Get-FileContent { + param ( + [string]$FilePath + ) + + $fileName = Split-Path -Leaf $FilePath + $relativePath = $FilePath.Replace($DirectoryPath, "").TrimStart("\") + + $content = Get-Content -Path $FilePath -Raw + + # If the KeepLineBreaks switch is not set, minify the content + if (-not $KeepLineBreaks) { + # Replace newlines with spaces, but keep any comments (preserving their start) + $content = $content -replace "`r`n", " " -replace "`n", " " + # Remove redundant spaces + $content = $content -replace "\s+", " " + } + + # Create a header for this file + $header = "FILE: $relativePath " + + return $header + $content + " " +} + +Write-Host "Starting code extraction from: $DirectoryPath" + +# Create empty output file +$outputPath = "output.txt" +"" | Out-File -FilePath $outputPath -Force + +# Get all .java files from the /src/main/ folder only +$srcMainPath = Join-Path -Path $DirectoryPath -ChildPath "src\main" +if (Test-Path -Path $srcMainPath) { + $javaFiles = Get-ChildItem -Path $srcMainPath -Recurse -Filter "*.java" | + Select-Object -ExpandProperty FullName + Write-Host "Found $($javaFiles.Count) Java files in src\main folder" +} else { + $javaFiles = @() + Write-Warning "The src\main folder does not exist in this project" +} + +# Find the default files if they exist +$foundDefaultFiles = @() +foreach ($file in $defaultFiles) { + $filePath = Get-ChildItem -Path $DirectoryPath -Recurse -Filter $file -File | Select-Object -ExpandProperty FullName + if ($filePath) { + $foundDefaultFiles += $filePath + Write-Host "Found default file: $file" + } else { + Write-Warning "Default file not found: $file" + } +} + +# Find files with additional extensions if specified +$extensionFiles = @() +if ($AdditionalExtensions) { + foreach ($ext in $AdditionalExtensions) { + $extClean = $ext.TrimStart(".") + $foundFiles = Get-ChildItem -Path $DirectoryPath -Recurse -Filter "*.$extClean" | + Where-Object { -not (Is-TestFile -FilePath $_.FullName) } | + Select-Object -ExpandProperty FullName + + $extensionFiles += $foundFiles + Write-Host "Found $($foundFiles.Count) files with extension .$extClean" + } +} + +# Add specified additional files if they exist +$validAdditionalFiles = @() +if ($AdditionalFiles) { + foreach ($file in $AdditionalFiles) { + $filePath = Join-Path -Path $DirectoryPath -ChildPath $file + if (Test-Path -Path $filePath) { + $validAdditionalFiles += $filePath + Write-Host "Found additional file: $file" + } else { + Write-Warning "Additional file not found: $file" + } + } +} + +# Combine all files +$allFiles = $javaFiles + $foundDefaultFiles + $extensionFiles + $validAdditionalFiles | Sort-Object + +# Process each file +$totalFiles = $allFiles.Count +$processedFiles = 0 + +foreach ($file in $allFiles) { + $processedFiles++ + Write-Progress -Activity "Processing Files" -Status "Processing file $processedFiles of $totalFiles" -PercentComplete (($processedFiles / $totalFiles) * 100) + + $fileContent = Get-FileContent -FilePath $file + Add-Content -Path $outputPath -Value $fileContent -NoNewline +} + +# Add a final newline to the file +Add-Content -Path $outputPath -Value "`n" -NoNewline + +$outputSize = (Get-Item -Path $outputPath).Length / 1KB +Write-Host "Extraction complete. $processedFiles files processed and saved to $outputPath ($([Math]::Round($outputSize, 2)) KB)" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..73d915e --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# Content Extractor Scripts +Scripts for generating project context for llms + +## ContentExtractor.ps1 +> PowerShell +### Features: +1. Extract all .java files +2. Extract configured additional files +3. Extract configured additional file types + +### Usage example +> .\ContextExtractor.ps1 -AdditionalFiles "src\main\resources\static\index.html" \ No newline at end of file