VOIDER commited on
Commit
29606bb
·
verified ·
1 Parent(s): f19e8d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +358 -650
app.py CHANGED
@@ -2,689 +2,397 @@ import gradio as gr
2
  import os
3
  import subprocess
4
  import tempfile
5
- import shutil
6
  import zipfile
7
- import fnmatch
8
  from pathlib import Path
9
- from pygments.lexers import guess_lexer_for_filename
10
- from pygments.util import ClassNotFound
11
- import logging
12
- import time
13
- import math
14
-
15
- # Try importing pyperclip, provide instructions if missing
16
- try:
17
- import pyperclip
18
- PYPERCLIP_AVAILABLE = True
19
- except ImportError:
20
- PYPERCLIP_AVAILABLE = False
21
- logging.warning("pyperclip library not found. 'Copy to Clipboard' functionality will be disabled. Install with: pip install pyperclip")
22
-
23
-
24
- # --- Configuration ---
25
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
-
27
- DEFAULT_IGNORE_PATTERNS = [
28
- # Version Control
29
- ".git/", ".gitignore", ".gitattributes", ".gitmodules", ".svn/", ".hg/",
30
- # IDE/Editor Files
31
- ".idea/", ".vscode/", "*.sublime-project", "*.sublime-workspace", ".project", ".classpath", "*.tmproj", ".DS_Store", "Thumbs.db",
32
- # Build Outputs & Dependencies
33
- "build/", "dist/", "bin/", "obj/", "out/", "target/", "*.o", "*.so", "*.dll", "*.exe", "*.class", "*.jar", "*.war", "*.ear",
34
- "node_modules/", "bower_components/", "vendor/", "*.egg-info/", "wheels/", "**/__pycache__/", "*.pyc",
35
- # Python Virtual Environments
36
- ".venv/", "venv/", "env/", ".env", "pip-cache/",
37
- # Logs & Temporary Files
38
- "*.log", "*.tmp", "*.temp", "*.swp", "*.swo", "*.bak",
39
- # OS Generated Files
40
- "._*",
41
- # Secrets (important!)
42
- "*.pem", "*.key", ".env*", "secrets.*", "credentials.*",
43
- # Common Framework/Tool cache/temp files
44
- ".pytest_cache/", ".tox/", ".mypy_cache/", ".ruff_cache/", "*.ipynb_checkpoints",
45
- # MACOS specific zip artifact
46
- "__MACOSX/",
47
- ]
48
- DEFAULT_MAX_FILE_SIZE_KB = 1024 # 1 MB limit for file content inclusion
49
- CLONE_TIMEOUT_SPARSE = 120 # seconds
50
- CLONE_TIMEOUT_STANDARD = 300 # seconds
51
- ZIP_EXTRACT_WARN_THRESHOLD = 1000 # Warn if ZIP contains more than this many files
52
- MAX_FILES_FOR_DETAILED_PROGRESS = 500 # Only show per-file progress if fewer than this many files
53
-
54
- # --- Core Logic ---
55
-
56
- def should_ignore(path_obj: Path, ignore_patterns: list[str], repo_root: Path) -> bool:
57
- """Checks if a file or directory path should be ignored based on gitignore-style patterns."""
58
  try:
59
- relative_path = path_obj.relative_to(repo_root)
60
- # Use POSIX paths for consistent pattern matching regardless of OS
61
- relative_path_str = relative_path.as_posix()
62
- except ValueError:
63
- logging.warning(f"Path {path_obj} not relative to root {repo_root}, ignoring.")
64
- return True
65
-
66
- # Optimization: Check direct name match first for common ignores like '.git'
67
- if path_obj.name in ignore_patterns:
68
- return True
69
-
70
- for pattern in ignore_patterns:
71
- pattern = pattern.strip()
72
- if not pattern or pattern.startswith('#'):
73
- continue
74
-
75
- # Ensure pattern uses POSIX separators
76
- pattern_posix = pattern.replace(os.sep, '/')
77
-
78
- # Case 1: Pattern specifies a directory (ends with '/')
79
- if pattern_posix.endswith('/'):
80
- # Match if the relative path *is* this directory or starts with it
81
- # Example: pattern "build/", path "build" or "build/foo.txt"
82
- dir_pattern = pattern_posix.rstrip('/')
83
- if relative_path_str == dir_pattern or relative_path_str.startswith(dir_pattern + '/'):
84
- return True
85
- # Also match if a *directory component* matches the name (like ignoring 'node_modules' anywhere)
86
- # Example: pattern "node_modules/", path "src/my_lib/node_modules/some_dep"
87
- if path_obj.is_dir() and path_obj.name == dir_pattern:
88
- return True
89
- # Check parent directories as well
90
- for parent in relative_path.parents:
91
- if parent.name == dir_pattern:
92
- return True
93
-
94
-
95
- # Case 2: Pattern is a file or general pattern (using fnmatch)
96
- # Use fnmatchcase for potentially case-sensitive filesystems if needed,
97
- # but fnmatch is often sufficient and more git-like on Win/Mac.
98
- if fnmatch.fnmatch(relative_path_str, pattern_posix):
99
- return True
100
- # Also match just the filename part for patterns like "*.log"
101
- if fnmatch.fnmatch(path_obj.name, pattern_posix):
102
- return True
103
-
104
-
105
- return False
106
-
107
- def is_likely_binary(file_path: Path, chunk_size=1024) -> bool:
108
- """Checks if a file is likely binary by reading a chunk."""
109
  try:
110
- with file_path.open('rb') as f:
111
- chunk = f.read(chunk_size)
112
- return b'\0' in chunk
113
- except OSError as e:
114
- logging.warning(f"Could not read file chunk for binary check {file_path}: {e}")
115
- return True
116
-
117
- def get_file_content(file_path: Path, max_size_bytes: int) -> tuple[str | None, str | None, str | None]:
118
- """Reads file content, detects language, handles size limits and encodings."""
 
 
 
 
 
119
  try:
120
- file_size = file_path.stat().st_size
121
- if file_size > max_size_bytes:
122
- kb_limit = max_size_bytes / 1024
123
- kb_actual = file_size / 1024
124
- return None, None, f"[Content skipped: File size ({kb_actual:.1f} KB) exceeds limit ({kb_limit:.1f} KB)]"
125
-
126
- if file_size == 0:
127
- return "", "", None # Empty file
128
-
129
- if is_likely_binary(file_path):
130
- return None, None, "[Content skipped: Detected as likely binary file]"
131
-
132
- content = None
133
- detected_encoding = 'utf-8'
134
- try:
135
- with file_path.open('r', encoding='utf-8') as f:
136
- content = f.read()
137
- except UnicodeDecodeError:
138
- logging.warning(f"UTF-8 decoding failed for {file_path}, trying latin-1.")
139
- detected_encoding = 'latin-1'
140
- try:
141
- with file_path.open('r', encoding='latin-1') as f:
142
- content = f.read()
143
- except Exception as e_read:
144
- logging.error(f"Error reading file {file_path} even with latin-1: {e_read}")
145
- return None, None, f"[Content skipped: Error reading file - {e_read}]"
146
- except OSError as e_os:
147
- logging.error(f"OS Error reading file {file_path}: {e_os}")
148
- return None, None, f"[Content skipped: OS Error reading file - {e_os}]"
149
-
150
- language = ""
151
- try:
152
- lexer = guess_lexer_for_filename(file_path.name, content)
153
- language = lexer.aliases[0] if lexer.aliases else lexer.name
154
- except ClassNotFound:
155
- language = "" # Plain text
156
- except Exception as e_lexer:
157
- logging.warning(f"Could not guess lexer for {file_path}: {e_lexer}")
158
- language = "" # Fallback
159
-
160
- return content, language, None
161
-
162
- except OSError as e_os:
163
- logging.error(f"OS Error processing file {file_path}: {e_os}")
164
- return None, None, f"[Content skipped: Error accessing file properties - {e_os}]"
165
  except Exception as e:
166
- logging.error(f"Unexpected error processing file {file_path}: {e}", exc_info=True)
167
- return None, None, f"[Content skipped: Unexpected error processing file - {e}]"
168
-
169
- # --- MODIFIED: Function now uses yield for status updates ---
170
- def generate_markdown_for_repo(repo_path_str: str, ignore_patterns: list[str], max_file_size_kb: int, include_content: bool):
 
 
 
 
 
 
 
171
  """
172
- Generates Markdown content for the repository structure and optionally files.
173
- Yields status updates during processing.
174
  """
175
- repo_root = Path(repo_path_str).resolve()
176
- yield f"Status: Analysing repository at {repo_root}..."
177
- logging.info(f"Starting markdown generation for: {repo_root}")
178
-
179
- md_lines = ["# Repository Analysis\n"]
180
- structure_lines = []
181
- content_lines = []
182
- max_size_bytes = max_file_size_kb * 1024
183
- files_to_process = []
184
-
185
- # --- Pre-computation: Collect all files to potentially process ---
186
- yield "Status: Scanning file structure..."
187
- all_paths = []
188
- for root, dirs, files in os.walk(repo_path_str, topdown=True):
189
- root_path = Path(root).resolve()
190
-
191
- # --- Filter ignored directories before adding paths ---
192
- # We need to check against the original dirs list before modifying it
193
- original_dirs = list(dirs)
194
- dirs[:] = [d for d in original_dirs if not should_ignore(root_path / d, ignore_patterns, repo_root)]
195
-
196
- # Add directories that are *not* ignored
197
- for d in dirs: # Add the non-ignored directory paths
198
- all_paths.append(root_path / d)
199
-
200
- # Add files that are *not* ignored
201
- for f in files:
202
- file_path = root_path / f
203
- if not should_ignore(file_path, ignore_patterns, repo_root):
204
- all_paths.append(file_path)
205
-
206
- # --- Pass 1: Build the directory structure visualization ---
207
- yield "Status: Generating directory structure..."
208
- structure_lines.append("## Directory Structure")
209
- structure_lines.append("```")
210
- structure_tree = []
211
- processed_dirs_for_structure = set()
212
-
213
- def add_to_structure(path_obj: Path, depth: int):
214
- indent = " " * depth # 4 spaces indent
215
- prefix = "└── "
216
- if path_obj.is_dir():
217
- # Add directory only if it hasn't been added via a parent walk already
218
- if path_obj not in processed_dirs_for_structure:
219
- structure_tree.append(f"{indent}{prefix}{path_obj.name}/")
220
- processed_dirs_for_structure.add(path_obj)
221
- # Recursively add children
222
- try:
223
- for item in sorted(path_obj.iterdir(), key=lambda p: (p.is_file(), p.name.lower())):
224
- if not should_ignore(item, ignore_patterns, repo_root):
225
- add_to_structure(item, depth + 1)
226
- except OSError as e:
227
- logging.warning(f"Could not access directory {path_obj}: {e}")
228
- structure_tree.append(f"{indent} └── [Error accessing directory: {e}]")
229
-
230
- elif path_obj.is_file():
231
- structure_tree.append(f"{indent}{prefix}{path_obj.name}")
232
-
233
- # Start building the structure from the root
234
- structure_tree.append(f"{repo_root.name}/")
235
- processed_dirs_for_structure.add(repo_root)
236
- try:
237
- for item in sorted(repo_root.iterdir(), key=lambda p: (p.is_file(), p.name.lower())):
238
- if not should_ignore(item, ignore_patterns, repo_root):
239
- add_to_structure(item, 1)
240
- except OSError as e:
241
- logging.error(f"Could not access repository root {repo_root}: {e}")
242
- structure_tree.append(f" └── [Error accessing repository root: {e}]")
243
-
244
-
245
- structure_lines.extend(structure_tree)
246
- structure_lines.append("```\n")
247
- yield "Status: Directory structure generated."
248
- logging.info("Directory structure built.")
249
-
250
- # --- Pass 2: Process file contents (ONLY if requested) ---
251
- files_to_render = [p for p in all_paths if p.is_file()]
252
- total_files = len(files_to_render)
253
-
254
- if include_content and total_files > 0:
255
- yield f"Status: Processing content of {total_files} file(s)..."
256
- content_lines.append("## File Contents\n")
257
- start_time = time.time()
258
- show_detailed_progress = total_files <= MAX_FILES_FOR_DETAILED_PROGRESS
259
-
260
- for i, file_path in enumerate(files_to_render):
261
- if show_detailed_progress or (i % 50 == 0 and i > 0): # Update every 50 files if many files
262
- progress_percent = (i + 1) / total_files
263
- yield f"Status: Processing file {i+1}/{total_files}: {file_path.relative_to(repo_root).as_posix()} ({progress_percent:.0%})"
264
-
265
- try:
266
- relative_path_str = file_path.relative_to(repo_root).as_posix()
267
- content_lines.append(f"### `{relative_path_str}`\n") # Use POSIX path in Markdown
268
- content, language, error_msg = get_file_content(file_path, max_size_bytes)
269
-
270
- if error_msg:
271
- content_lines.append(f"```\n{error_msg}\n```\n")
272
- elif content is not None:
273
- lang_hint = language if language else ""
274
- content_lines.append(f"```{lang_hint}\n{content}\n```\n")
275
- else:
276
- # Should generally be covered by error_msg cases, but as a fallback
277
- content_lines.append("```\n[Content not available or file is binary/empty]\n```\n")
278
-
279
- except ValueError:
280
- logging.warning(f"Path {file_path} not relative to {repo_root}, skipping content.")
281
- continue
282
- except Exception as e:
283
- logging.error(f"Unexpected error processing content for {file_path}: {e}", exc_info=True)
284
- relative_path_str = file_path.name # Fallback name
285
- try:
286
- relative_path_str = file_path.relative_to(repo_root).as_posix()
287
- except ValueError: pass
288
- content_lines.append(f"### `{relative_path_str}`\n")
289
- content_lines.append(f"```\n[ERROR processing file content: {e}]\n```\n")
290
-
291
- end_time = time.time()
292
- yield f"Status: File content processing complete ({total_files} files in {end_time - start_time:.2f}s)."
293
- logging.info(f"File content processing complete. Processed {total_files} files in {end_time - start_time:.2f} seconds.")
294
- elif not include_content:
295
- yield "Status: Skipping file content inclusion as requested."
296
- logging.info("Skipping file content inclusion as requested.")
297
- else: # include_content is True but total_files is 0
298
- yield "Status: No files found to include content for (after filtering)."
299
- logging.info("No files found to include content for (after filtering).")
300
-
301
- # Combine structure and content
302
- md_lines.extend(structure_lines)
303
- if include_content and content_lines: # Only add content section if requested and content exists
304
- md_lines.extend(content_lines)
305
-
306
- yield "Status: Markdown generation complete!"
307
- yield "".join(md_lines) # Final yield is the complete markdown
308
-
309
-
310
- # --- MODIFIED: Function is now a generator, yielding tuples for multiple outputs ---
311
- def repo_to_md_processor(input_type: str, repo_url: str | None, uploaded_zip: tempfile._TemporaryFileWrapper | None, git_branch: str | None, ignore_patterns_str: str, max_file_size_kb: int, include_content: bool):
312
  """
313
- Main processing generator function called by Gradio interface.
314
- Yields tuples: (status_update, markdown_update, file_update)
315
  """
316
- # Initialize updates for components
317
- status_update = "Status: Idle."
318
- md_update = gr.update() # Use gr.update() to signify no change initially
319
- file_update = gr.update(visible=False) # Keep file hidden initially
320
-
321
- temp_dir_obj = None
322
- repo_path = None
323
  output_md = ""
324
- output_file_path = None
325
- error_message = None
326
- start_time = time.time()
 
 
327
 
328
  try:
329
- status_update = "Status: Initializing..."
330
- yield (status_update, md_update, file_update) # Yield initial status
331
-
332
- user_patterns = {p.strip() for p in ignore_patterns_str.split(',') if p.strip()}
333
- default_patterns = set(DEFAULT_IGNORE_PATTERNS)
334
- combined_patterns = sorted(list(user_patterns.union(default_patterns)))
335
- logging.info(f"Using ignore patterns: {combined_patterns}")
336
- logging.info(f"Max file size for content: {max_file_size_kb} KB")
337
- logging.info(f"Include file content: {include_content}")
338
- if input_type == "URL" and git_branch:
339
- logging.info(f"Requested Git branch/tag: {git_branch}")
340
-
341
- # Reset output areas at the start of processing
342
- md_update = "*Processing started...*"
343
- file_update = gr.update(value=None, visible=False)
344
- yield (status_update, md_update, file_update)
345
-
346
- with tempfile.TemporaryDirectory(prefix="repo_md_") as temp_dir:
347
- logging.info(f"Created temporary directory: {temp_dir}")
348
- temp_dir_path = Path(temp_dir)
349
-
350
- if input_type == "URL":
351
- # --- URL Processing ---
352
- if not repo_url or not (repo_url.startswith("http://") or repo_url.startswith("https://") or repo_url.startswith("git@")):
353
- raise ValueError("Invalid Git URL. Must start with http(s):// or git@")
354
-
355
- status_update = f"Status: Processing URL: {repo_url}" + (f" (branch/tag: {git_branch})" if git_branch else "")
356
- yield (status_update, md_update, file_update) # Update status
357
-
358
- target_clone_path = temp_dir_path / "repo"
359
- target_clone_path.mkdir()
360
- repo_path_str = str(target_clone_path)
361
- branch_args = ["--branch", git_branch] if git_branch and git_branch.strip() else []
362
- common_args = ["--depth", "1"]
363
-
364
- try:
365
- # Try sparse checkout first
366
- status_update = "Status: Attempting efficient Git clone (sparse)..."
367
- yield (status_update, md_update, file_update)
368
- # ... [rest of sparse clone commands] ...
369
- subprocess.run(
370
- ["git", "clone"] + common_args + ["--filter=blob:none", "--no-checkout"] + branch_args + [repo_url, repo_path_str],
371
- check=True, capture_output=True, text=True, encoding='utf-8', errors='replace', timeout=CLONE_TIMEOUT_SPARSE
372
- )
373
- subprocess.run(["git", "sparse-checkout", "init", "--cone"], cwd=repo_path_str, check=True, capture_output=True, text=True)
374
- subprocess.run(["git", "checkout"], cwd=repo_path_str, check=True, capture_output=True, text=True, encoding='utf-8', errors='replace', timeout=CLONE_TIMEOUT_SPARSE)
375
- status_update = "Status: Efficient Git clone successful."
376
- yield (status_update, md_update, file_update)
377
- except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired) as e_sparse:
378
- status_update = f"Status: Efficient clone failed ({type(e_sparse).__name__}), attempting standard clone..."
379
- yield (status_update, md_update, file_update)
380
- logging.warning(f"Sparse clone failed: {e_sparse}. Output: {e_sparse.stderr if hasattr(e_sparse, 'stderr') else 'N/A'}")
381
- shutil.rmtree(target_clone_path, ignore_errors=True)
382
- target_clone_path.mkdir()
383
- try:
384
- # Fallback to standard shallow clone
385
- status_update = "Status: Attempting standard shallow clone..."
386
- yield (status_update, md_update, file_update)
387
- # ... [rest of standard clone commands] ...
388
- subprocess.run(
389
- ["git", "clone"] + common_args + branch_args + [repo_url, repo_path_str],
390
- check=True, capture_output=True, text=True, encoding='utf-8', errors='replace', timeout=CLONE_TIMEOUT_STANDARD
391
- )
392
- status_update = "Status: Standard shallow clone successful."
393
- yield (status_update, md_update, file_update)
394
- # ... [Error handling for standard clone] ...
395
- except FileNotFoundError:
396
- logging.error("Git command not found.")
397
- raise RuntimeError("Git command not found. Please install Git and ensure it's in your PATH.")
398
- except subprocess.CalledProcessError as e_std:
399
- error_detail = e_std.stderr or e_std.stdout or "No output captured."
400
- logging.error(f"Standard Git clone failed: {error_detail.strip()}")
401
- raise RuntimeError(f"Git clone failed:\n{error_detail.strip()}")
402
- except subprocess.TimeoutExpired:
403
- logging.error(f"Git clone timed out after {CLONE_TIMEOUT_STANDARD} seconds.")
404
- raise RuntimeError(f"Git clone timed out after {CLONE_TIMEOUT_STANDARD // 60} minutes.")
405
-
406
- repo_path = target_clone_path
407
-
408
- elif input_type == "Upload ZIP":
409
- # --- ZIP Processing ---
410
- if uploaded_zip is None or not hasattr(uploaded_zip, 'name'):
411
- raise ValueError("No ZIP file uploaded or invalid file object.")
412
-
413
- status_update = f"Status: Processing uploaded ZIP: {Path(uploaded_zip.name).name}"
414
- yield (status_update, md_update, file_update)
415
-
416
- target_extract_path = temp_dir_path / "extracted"
417
- target_extract_path.mkdir()
418
-
419
- try:
420
- with zipfile.ZipFile(uploaded_zip.name, 'r') as zip_ref:
421
- members = zip_ref.namelist()
422
- num_files = len(members)
423
- status_update = f"Status: Extracting {num_files} entries from ZIP..."
424
- yield (status_update, md_update, file_update)
425
- # ... [rest of zip extraction and checks] ...
426
- zip_ref.extractall(target_extract_path)
427
- status_update = "Status: ZIP extraction complete."
428
- yield (status_update, md_update, file_update)
429
- # ... [Error handling for zip extraction] ...
430
- except zipfile.BadZipFile:
431
- logging.error("Invalid or corrupted ZIP file uploaded.")
432
- raise ValueError("Invalid or corrupted ZIP file.")
433
- except Exception as e_extract:
434
- logging.error(f"Failed to extract ZIP file: {e_extract}", exc_info=True)
435
- raise RuntimeError(f"Failed to extract ZIP file: {e_extract}")
436
-
437
- # Determine repo root within extracted files
438
- # ... [Logic to find repo_path] ...
439
- extracted_items = list(target_extract_path.iterdir())
440
- filtered_items = [item for item in extracted_items if item.name not in (".DS_Store", "__MACOSX")]
441
- if len(filtered_items) == 1 and filtered_items[0].is_dir():
442
- repo_path = filtered_items[0]
443
- else:
444
- repo_path = target_extract_path
445
- logging.info(f"Using repo path: {repo_path}")
446
-
447
-
448
- else: # Should not happen with Radio button
449
- raise ValueError("Invalid input type selected.")
450
-
451
- if not repo_path or not repo_path.is_dir():
452
- raise RuntimeError(f"Could not determine valid repository path.")
453
-
454
- status_update = f"Status: Repository path identified: {repo_path.name}"
455
- yield (status_update, md_update, file_update)
456
-
457
- # --- Generate Markdown (Generator within a Generator) ---
458
- # The inner generator yields status updates which we pass through
459
- markdown_generator = generate_markdown_for_repo(str(repo_path), combined_patterns, max_file_size_kb, include_content)
460
- while True:
461
- try:
462
- # Get next item from the inner generator
463
- status_or_result = next(markdown_generator)
464
-
465
- # Check if it's a status update or the final markdown
466
- if isinstance(status_or_result, str) and status_or_result.startswith("Status:"):
467
- status_update = status_or_result # Pass through status update
468
- yield (status_update, md_update, file_update)
469
- else:
470
- # It's the final markdown content
471
- output_md = status_or_result
472
- md_update = output_md # Prepare to update markdown output
473
- break # Exit the inner loop
474
-
475
- except StopIteration:
476
- logging.error("Markdown generator finished unexpectedly without yielding final result.")
477
- raise RuntimeError("Markdown generation failed internally.")
478
-
479
- # --- Save output file ---
480
- status_update = "Status: Saving output file..."
481
- yield (status_update, md_update, file_update) # Update status
482
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".md", encoding='utf-8', prefix="repo_analysis_") as f:
483
- f.write(output_md)
484
- output_file_path = f.name
485
- logging.info(f"Markdown saved to temporary file: {output_file_path}")
486
-
487
- # --- Final Success Yield ---
488
- status_update = f"Status: Analysis complete! Output ready."
489
- file_update = gr.File(value=output_file_path, visible=True) # Make file downloadable
490
- yield (status_update, md_update, file_update)
491
 
 
 
492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  except Exception as e:
494
- logging.error(f"An error occurred during processing: {e}", exc_info=True)
495
- error_message = f"An error occurred: {e}"
496
- # --- Final Error Yield ---
497
- status_update = f"Status: Error - {error_message}"
498
- md_update = f"### Operation Failed\n\nAn error occurred during processing:\n\n```\n{e}\n```" # Update markdown with error
499
- file_update = gr.update(value=None, visible=False) # Hide file component
500
- yield (status_update, md_update, file_update)
501
 
502
  finally:
503
- # Cleanup is handled by TemporaryDirectory context manager
504
- end_time = time.time()
505
- logging.info(f"Total processing time: {end_time - start_time:.2f} seconds.")
506
-
507
- # The generator stops here. The last yield sent the final state.
508
-
509
-
510
- # --- Gradio Interface ---
511
-
512
- css = """
513
- body { font-family: sans-serif; }
514
- #md_output_panel { /* Style the output panel */
515
- max-height: 80vh;
516
- }
517
- #md_output {
518
- max-height: 70vh; /* Adjust max height for content */
519
- overflow: auto;
520
- border: 1px solid #ccc;
521
- border-radius: 5px;
522
- padding: 15px;
523
- background-color: #f9f9f9;
524
- }
525
- #md_output h1 { font-size: 1.6em; border-bottom: 1px solid #eee; padding-bottom: 5px; margin-top: 0;}
526
- #md_output h2 { font-size: 1.3em; border-bottom: 1px solid #eee; padding-bottom: 5px; margin-top: 20px; }
527
- #md_output h3 { font-size: 1.1em; margin-top: 15px; margin-bottom: 5px; color: #333; }
528
- #md_output code { background-color: #eee; padding: 2px 4px; border-radius: 3px; font-size: 0.9em; }
529
- #md_output pre { background-color: #fff; padding: 10px; border-radius: 4px; border: 1px solid #ddd; white-space: pre-wrap; word-wrap: break-word; }
530
- #md_output pre > code { display: block; padding: 0; background-color: transparent; border: none; font-size: 0.9em;} /* Better code block styling */
531
-
532
- #status_box {
533
- font-size: 0.9em;
534
- color: #555;
535
- padding: 8px;
536
- border: 1px dashed #ddd;
537
- background-color: #fafafa;
538
- border-radius: 4px;
539
- min-height: 3em; /* Ensure it's visible even when short messages */
540
- margin-top: 10px;
541
- }
542
- #copy_button { /* Style the copy button */
543
- margin-left: 10px;
544
- min-width: 100px; /* Give it a bit more width */
545
- }
546
- #download_output { margin-top: 15px; }
547
- footer { display: none !important; }
548
- .gradio-container { max-width: 1360px !important; margin: auto !important; }
549
- """
550
-
551
- # --- Helper function for Copy Button ---
552
- def copy_to_clipboard(text):
553
- if PYPERCLIP_AVAILABLE and text:
554
- try:
555
- pyperclip.copy(text)
556
- logging.info("Copied output to clipboard.")
557
- return gr.update(value="Copied!", variant="secondary") # Temporary feedback
558
- except Exception as e:
559
- logging.error(f"Failed to copy to clipboard: {e}")
560
- return gr.update(value="Copy Failed", variant="stop")
561
- elif not PYPERCLIP_AVAILABLE:
562
- logging.warning("Copy attempt failed: pyperclip not installed.")
563
- return gr.update(value="Install Pyperclip", variant="stop")
564
- else: # No text to copy
565
- return gr.update(value="Nothing to Copy", variant="secondary")
566
-
567
- def reset_copy_button():
568
- # Short delay before resetting button appearance
569
- time.sleep(1.5)
570
- return gr.update(value="Copy Markdown", variant="secondary")
571
-
572
-
573
- with gr.Blocks(css=css, title="Repo Analyzer", theme=gr.themes.Soft()) as demo:
574
- gr.Markdown("# Repository Analyzer")
575
- gr.Markdown(
576
- "Enter a public Git repository URL or upload a local project folder (as a `.zip` archive) "
577
- "to generate a single Markdown file containing its structure and optionally file contents. "
578
- "Provides real-time status updates."
579
- )
580
 
581
  with gr.Row():
582
- # --- Input Column ---
583
  with gr.Column(scale=1):
584
- gr.Markdown("### Input Source & Options")
585
  input_type = gr.Radio(
586
  ["URL", "Upload ZIP"], label="Input Source", value="URL"
587
  )
588
 
589
- # URL Specific Inputs (conditionally visible)
590
- url_input = gr.Textbox(
591
- label="Git Repository URL",
592
- placeholder="e.g., https://github.com/gradio-app/gradio.git or [email protected]:user/repo.git",
593
- visible=True, interactive=True, elem_id="url-input"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  )
595
- git_branch_input = gr.Textbox(
596
- label="Branch / Tag (Optional)",
597
- placeholder="e.g., main, develop, v1.2.3 (leave empty for default)",
598
- visible=True, interactive=True, elem_id="git-branch-input"
 
 
599
  )
600
 
601
- # ZIP Specific Inputs (conditionally visible)
602
- zip_input = gr.File(
603
- label="Upload Local Folder (as .zip)",
604
- file_types=[".zip"],
605
- visible=False, interactive=True, elem_id="zip-input"
606
- )
 
 
 
 
 
 
 
 
 
 
 
 
607
 
608
- # --- Common Options in Accordion ---
609
- with gr.Accordion("Configuration Options", open=False):
610
- include_content_checkbox = gr.Checkbox(
611
- label="Include File Content in Output",
612
- value=True,
613
- info="Generate structure only if unchecked."
614
- )
615
- max_size_input = gr.Number(
616
- label="Max File Size for Content (KB)",
617
- value=DEFAULT_MAX_FILE_SIZE_KB, minimum=0, step=64, precision=0,
618
- info="Files larger than this won't have content included (if enabled). 0 disables content.",
619
- )
620
- ignore_input = gr.Textbox(
621
- label="Ignore Patterns (comma-separated, gitignore style)",
622
- value=", ".join(DEFAULT_IGNORE_PATTERNS),
623
- placeholder="e.g., .git/, *.log, node_modules/",
624
- info="Uses gitignore syntax. Add `/` for directories. Defaults provided.",
625
- lines=5, max_lines=15
626
- )
627
-
628
- submit_btn = gr.Button("Analyze Repository", variant="primary")
629
-
630
- gr.Markdown("### Status Updates")
631
- status_output = gr.Textbox(label="Current Status", value="Idle.", interactive=False, lines=3, elem_id="status_box")
632
-
633
-
634
- # --- Output Column ---
635
  with gr.Column(scale=2):
636
- gr.Markdown("### Generated Output")
637
- with gr.Row(elem_id="output_header_row"):
638
- copy_button = gr.Button("Copy Markdown", variant="secondary", elem_id="copy_button", visible=PYPERCLIP_AVAILABLE) # Hide if pyperclip missing
639
- download_output = gr.File(label="Download .md File", interactive=False, visible=False, elem_id="download_output", scale=1) # Take less space initially
640
-
641
- md_output = gr.Markdown(value="*Awaiting analysis results...*", elem_id="md_output", visible=True)
642
-
643
-
644
- # --- Event Handlers ---
645
-
646
- # Update visibility based on input type choice
647
- def update_input_visibility(choice):
648
- is_url = choice == "URL"
649
- return {
650
- url_input: gr.update(visible=is_url),
651
- git_branch_input: gr.update(visible=is_url),
652
- zip_input: gr.update(visible=not is_url)
653
- }
654
-
655
- input_type.change(
656
- fn=update_input_visibility,
657
- inputs=input_type,
658
- outputs=[url_input, git_branch_input, zip_input],
659
- queue=False # UI only change
660
- )
661
 
662
- # Main processing logic on submit
663
- submit_btn.click(
664
- fn=repo_to_md_processor, # The generator function
665
  inputs=[
666
- input_type, url_input, zip_input, git_branch_input,
667
- ignore_input, max_size_input, include_content_checkbox,
 
 
 
 
 
668
  ],
669
- # Outputs map to yielded values: status strings, final markdown, final file path
670
- outputs=[ status_output, md_output, download_output ],
671
- api_name="repo_to_md"
672
  )
673
 
674
- # Copy button functionality
675
- if PYPERCLIP_AVAILABLE:
676
- copy_button.click(
677
- fn=copy_to_clipboard,
678
- inputs=[md_output], # Takes the current markdown content
679
- outputs=[copy_button], # Updates its own text/appearance
680
- queue=False
681
- ).then(
682
- fn=reset_copy_button, # Function to reset button after a delay
683
- inputs=None,
684
- outputs=[copy_button],
685
- queue=False # Don't queue the reset visual change
686
- )
687
-
688
- # Launch the interface
689
  if __name__ == "__main__":
690
- demo.queue().launch(server_name="0.0.0.0", show_error=True, debug=True) # Enable queue & debug for better testing
 
2
  import os
3
  import subprocess
4
  import tempfile
 
5
  import zipfile
6
+ import shutil
7
  from pathlib import Path
8
+ import fnmatch # Для базового gitignore-подобного сопоставления (альтернатива pathspec)
9
+ from pathspec import PathSpec
10
+ from pathspec.patterns import GitWildMatchPattern
11
+
12
+ # --- Константы ---
13
+ DEFAULT_IGNORE_PATTERNS = ".git/, __pycache__/, *.pyc, *.pyo, *.o, *.so, *.a, *.dll, *.dylib, node_modules/, dist/, build/, .env, .venv/, venv/, *.log, *tmp*, *temp*"
14
+
15
+ # --- Вспомогательные функции ---
16
+
17
+ def get_repo_name_from_url(url):
18
+ """Извлекает имя репозитория из URL."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  try:
20
+ return url.split('/')[-1].replace('.git', '')
21
+ except Exception:
22
+ return "repository"
23
+
24
+ def is_binary(filepath):
25
+ """Проверяет, является ли файл бинарным (упрощенная проверка)."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
+ with open(filepath, 'rb') as f:
28
+ chunk = f.read(1024)
29
+ return b'\0' in chunk
30
+ except Exception:
31
+ return True # Считаем бинарным при ошибке чтения
32
+
33
+ def create_gitignore_spec(ignore_patterns_str, repo_root):
34
+ """Создает объект PathSpec из строки паттернов."""
35
+ patterns = [p.strip() for p in ignore_patterns_str.split(',') if p.strip()]
36
+ # Добавляем .git/ всегда, если его нет
37
+ if ".git/" not in patterns and ".git" not in patterns:
38
+ patterns.append(".git/")
39
+ # print(f"Using patterns: {patterns}") # Для отладки
40
+ # Используем GitWildMatchPattern для поддержки синтаксиса .gitignore
41
  try:
42
+ # pathspec ожидает паттерны относительно корня, где находится .gitignore
43
+ # Если мы находимся в корне репозитория, то пути файлов тоже должны быть относительными к этому корню
44
+ spec = PathSpec.from_lines(GitWildMatchPattern, patterns)
45
+ return spec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
+ print(f"Error creating PathSpec: {e}")
48
+ # Возвращаем пустой spec в случае ошибки парсинга паттернов
49
+ return PathSpec.from_lines(GitWildMatchPattern, [])
50
+
51
+
52
+ def generate_markdown_from_path(
53
+ repo_path,
54
+ include_content,
55
+ max_content_kb,
56
+ ignore_patterns_str,
57
+ progress=gr.Progress(track_ τότε=True)
58
+ ):
59
  """
60
+ Генерирует Markdown-представление структуры и содержимого каталога.
 
61
  """
62
+ repo_path_obj = Path(repo_path).resolve()
63
+ markdown_lines = []
64
+ max_content_bytes = max_content_kb * 1024 if include_content and max_content_kb > 0 else 0
65
+
66
+ progress(0, desc="Parsing ignore patterns...")
67
+ spec = create_gitignore_spec(ignore_patterns_str, repo_path_obj)
68
+
69
+ # Используем Path() для упрощения работы с путями
70
+ root_path = Path(repo_path).resolve()
71
+ num_files_processed = 0
72
+ total_items = sum(1 for _ in root_path.rglob('*')) # Приблизительная оценка для прогресс-бара
73
+
74
+ markdown_lines.append(f"# Repository Structure: {root_path.name}\n")
75
+
76
+ progress(0.1, desc="Walking directory tree...")
77
+
78
+ items_to_process = sorted(list(root_path.rglob('*')), key=lambda p: str(p))
79
+ processed_count = 0
80
+
81
+ # Рекурсивная функция для обхода и генерации Markdown
82
+ def process_directory(current_path, level=0):
83
+ nonlocal processed_count
84
+ indent = ' ' * level
85
+ # Сначала обрабатываем каталоги, затем файлы
86
+ entries = sorted(list(current_path.iterdir()), key=lambda p: (p.is_file(), p.name))
87
+
88
+ for entry in entries:
89
+ processed_count += 1
90
+ relative_path_for_match = entry.relative_to(root_path)
91
+
92
+ # --- Игнорирование ---
93
+ # pathspec ожидает строки, добавляем '/' для каталогов для правильного соответствия
94
+ match_path_str = str(relative_path_for_match)
95
+ if entry.is_dir():
96
+ match_path_str += '/'
97
+
98
+ is_ignored = spec.match_file(match_path_str)
99
+ # print(f"Checking '{match_path_str}': ignored={is_ignored}") # Для отладки
100
+
101
+ if is_ignored:
102
+ # print(f"Ignoring: {entry}")
103
+ # Если директория игнорируется, пропускаем всё её содержимое
104
+ if entry.is_dir():
105
+ # Нужно посчитать все элементы внутри для прогресса
106
+ try:
107
+ ignored_subtree_count = sum(1 for _ in entry.rglob('*'))
108
+ processed_count += ignored_subtree_count
109
+ except Exception:
110
+ pass # Игнорируем ошибки доступа и т.д.
111
+ continue # Пропускаем этот элемент
112
+
113
+ # Обновляем прогресс
114
+ progress(min(0.1 + 0.8 * (processed_count / total_items), 0.9) if total_items > 0 else 0.5,
115
+ desc=f"Processing: {relative_path_for_match}")
116
+
117
+ # --- Генерация Markdown ---
118
+ if entry.is_dir():
119
+ markdown_lines.append(f"{indent}- **{entry.name}/**")
120
+ process_directory(entry, level + 1) # Рекурсивный вызов
121
+ elif entry.is_file():
122
+ markdown_lines.append(f"{indent}- {entry.name}")
123
+ if include_content:
124
+ try:
125
+ file_size = entry.stat().st_size
126
+ if max_content_bytes == 0: # 0 означает без контента
127
+ continue
128
+ if max_content_bytes > 0 and file_size > max_content_bytes:
129
+ markdown_lines.append(f"{indent} `[Content omitted: File size ({file_size / 1024:.2f} KB) > limit ({max_content_kb} KB)]`")
130
+ elif file_size == 0:
131
+ markdown_lines.append(f"{indent} `[File is empty]`")
132
+ elif is_binary(entry):
133
+ markdown_lines.append(f"{indent} `[Content omitted: Binary file detected]`")
134
+ else:
135
+ try:
136
+ # Пытаемся читать как UTF-8, затем как Latin-1 если не вышло
137
+ file_content = entry.read_text(encoding='utf-8')
138
+ except UnicodeDecodeError:
139
+ try:
140
+ file_content = entry.read_text(encoding='latin-1')
141
+ except Exception as read_err:
142
+ markdown_lines.append(f"{indent} `[Error reading file: {read_err}]`")
143
+ continue # Пропускаем добавление блока кода при ошибке
144
+
145
+ # Определяем язык для подсветки синтаксиса (опционально)
146
+ lang = entry.suffix.lstrip('.')
147
+ markdown_lines.append(f"{indent} ```{lang}\n{file_content}\n{indent} ```")
148
+
149
+ except OSError as e:
150
+ markdown_lines.append(f"{indent} `[Error accessing file: {e}]`")
151
+ except Exception as e:
152
+ markdown_lines.append(f"{indent} `[Unexpected error processing file: {e}]`")
153
+
154
+ # Запускаем обработку с корневого каталога
155
+ process_directory(root_path)
156
+
157
+ progress(1, desc="Markdown generated!")
158
+ return "\n".join(markdown_lines)
159
+
160
+
161
+ # --- Основная функция Gradio ---
162
+
163
+ def process_repository(
164
+ input_type,
165
+ repo_url,
166
+ branch_tag,
167
+ zip_upload,
168
+ include_content,
169
+ max_content_kb,
170
+ ignore_patterns,
171
+ progress=gr.Progress(track_progress=True)
172
+ ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  """
174
+ Главная функция, вызываемая Gradio. Обрабатывает URL или ZIP.
 
175
  """
176
+ status_updates = []
 
 
 
 
 
 
177
  output_md = ""
178
+ download_file = None
179
+
180
+ temp_dir = tempfile.mkdtemp()
181
+ repo_path = ""
182
+ repo_name = "repository"
183
 
184
  try:
185
+ if input_type == "URL":
186
+ if not repo_url or not repo_url.startswith(('http://', 'https://')):
187
+ raise ValueError("Please provide a valid HTTP/HTTPS Git repository URL.")
188
+
189
+ repo_name = get_repo_name_from_url(repo_url)
190
+ repo_path = os.path.join(temp_dir, repo_name)
191
+ status_updates.append(f"Cloning repository: {repo_url}...")
192
+ yield "\n".join(status_updates), "", None, progress # Обновляем статус
193
+
194
+ git_command = ["git", "clone", "--depth", "1"] # Экономим трафик/время
195
+ if branch_tag:
196
+ git_command.extend(["--branch", branch_tag])
197
+ status_updates.append(f"Using branch/tag: {branch_tag}")
198
+ yield "\n".join(status_updates), "", None, progress
199
+ git_command.extend([repo_url, repo_path])
200
+
201
+ # progress(0.1, desc="Cloning...") # Начало клонирования
202
+ # Используем subprocess.run для выполнения команды git
203
+ result = subprocess.run(git_command, capture_output=True, text=True, check=False) # check=False для ручной проверки
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ if result.returncode != 0:
206
+ raise subprocess.CalledProcessError(result.returncode, git_command, output=result.stdout, stderr=result.stderr)
207
 
208
+ status_updates.append("Cloning successful.")
209
+ yield "\n".join(status_updates), "", None, progress
210
+
211
+ elif input_type == "Upload ZIP":
212
+ if zip_upload is None:
213
+ raise ValueError("Please upload a ZIP file.")
214
+
215
+ status_updates.append(f"Processing uploaded ZIP file: {os.path.basename(zip_upload.name)}...")
216
+ yield "\n".join(status_updates), "", None, progress # Обновляем статус
217
+
218
+ repo_name = os.path.splitext(os.path.basename(zip_upload.name))[0]
219
+ repo_path = os.path.join(temp_dir, repo_name)
220
+ os.makedirs(repo_path, exist_ok=True)
221
+
222
+ # progress(0.1, desc="Extracting ZIP...") # Начало извлечения
223
+ try:
224
+ with zipfile.ZipFile(zip_upload.name, 'r') as zip_ref:
225
+ # Проверка на "Zip Slip" уязвимость (пути вне целевого каталога)
226
+ for member in zip_ref.namelist():
227
+ member_path = os.path.abspath(os.path.join(repo_path, member))
228
+ if not member_path.startswith(os.path.abspath(repo_path)):
229
+ raise SecurityException(f"Attempted Path Traversal in ZIP: {member}")
230
+ # Извлечение
231
+ zip_ref.extractall(repo_path)
232
+ status_updates.append("ZIP extraction successful.")
233
+ yield "\n".join(status_updates), "", None, progress
234
+ except zipfile.BadZipFile:
235
+ raise ValueError("Uploaded file is not a valid ZIP archive.")
236
+ except Exception as e:
237
+ raise RuntimeError(f"Error extracting ZIP file: {e}")
238
+
239
+ else:
240
+ raise ValueError("Invalid input type selected.")
241
+
242
+ # --- Генерация Markdown ---
243
+ status_updates.append("Generating Markdown structure...")
244
+ if include_content:
245
+ status_updates.append(f"Including file content (Max size: {max_content_kb} KB)...")
246
+ yield "\n".join(status_updates), "", None, progress # Обновляем статус
247
+
248
+ # Передаем объект progress в функцию генерации
249
+ output_md = generate_markdown_from_path(
250
+ repo_path,
251
+ include_content,
252
+ max_content_kb,
253
+ ignore_patterns if ignore_patterns else DEFAULT_IGNORE_PATTERNS,
254
+ progress=progress # Передаем объект progress
255
+ )
256
+
257
+ status_updates.append("Markdown generation complete.")
258
+
259
+ # --- Подготовка файла для скачивания ---
260
+ output_filename = f"{repo_name}_structure.md"
261
+ output_filepath = os.path.join(temp_dir, output_filename)
262
+ with open(output_filepath, "w", encoding="utf-8") as f:
263
+ f.write(output_md)
264
+ download_file = output_filepath # Gradio поймет это как путь к файлу для скачивания
265
+
266
+ yield "\n".join(status_updates), output_md, download_file, progress
267
+
268
+ except ValueError as ve:
269
+ status_updates.append(f"Input Error: {ve}")
270
+ yield "\n".join(status_updates), "", None, gr.Progress(visible=False) # Скрыть прогресс при ошибке
271
+ except subprocess.CalledProcessError as cpe:
272
+ status_updates.append(f"Git Error: Failed to clone repository.")
273
+ status_updates.append(f"Command: {' '.join(cpe.cmd)}")
274
+ status_updates.append(f"Stderr: {cpe.stderr}")
275
+ status_updates.append(f"Stdout: {cpe.stdout}")
276
+ yield "\n".join(status_updates), "", None, gr.Progress(visible=False)
277
+ except SecurityException as se: # Обработка Zip Slip
278
+ status_updates.append(f"Security Error: {se}")
279
+ yield "\n".join(status_updates), "", None, gr.Progress(visible=False)
280
  except Exception as e:
281
+ status_updates.append(f"An unexpected error occurred: {type(e).__name__}: {e}")
282
+ import traceback
283
+ status_updates.append(f"Traceback:\n{traceback.format_exc()}") # Добавим traceback для отладки
284
+ yield "\n".join(status_updates), "", None, gr.Progress(visible=False) # Скрыть прогресс при ошибке
 
 
 
285
 
286
  finally:
287
+ # --- Очистка ---
288
+ # Не удаляем temp_dir сразу, если download_file указывает на файл внутри него.
289
+ # Gradio сам управляет временными файлами, которые он возвращает.
290
+ # Но если мы создали временный каталог для клонирования/извлечения,
291
+ # который *не* содержит возвращаемый файл напрямую, его нужно удалить.
292
+ # В данном случае download_file *внутри* temp_dir, Gradio должен справиться.
293
+ # Для надежности можно скопировать файл в другое временное место,
294
+ # а потом удалить temp_dir. Но пока оставим так, Gradio обычно чистит за собой.
295
+ # print(f"Temp directory {temp_dir} contents:")
296
+ # for item in Path(temp_dir).rglob('*'): print(f"- {item}")
297
+ # print(f"Cleanup: {temp_dir} - Download file: {download_file}")
298
+ pass # Оставляем очистку Gradio для возвращенного файла
299
+
300
+
301
+ # --- Интерфейс Gradio ---
302
+
303
+ with gr.Blocks(title="GitHub Repo to Markdown") as demo:
304
+ gr.Markdown("# GitHub Repository to Markdown Converter")
305
+ gr.Markdown("Convert a GitHub repository (via URL or ZIP) into a single Markdown file representing its structure and optionally content.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  with gr.Row():
 
308
  with gr.Column(scale=1):
 
309
  input_type = gr.Radio(
310
  ["URL", "Upload ZIP"], label="Input Source", value="URL"
311
  )
312
 
313
+ # --- URL Inputs ---
314
+ with gr.Group(visible=True) as url_inputs:
315
+ repo_url = gr.Textbox(
316
+ label="Git Repository URL",
317
+ placeholder="https://github.com/username/repository.git",
318
+ lines=1,
319
+ )
320
+ branch_tag = gr.Textbox(
321
+ label="Branch / Tag (Optional)",
322
+ placeholder="main (or specific branch/tag)",
323
+ lines=1,
324
+ )
325
+
326
+ # --- ZIP Upload Input ---
327
+ with gr.Group(visible=False) as zip_inputs:
328
+ zip_upload = gr.File(label="Upload ZIP File", file_types=[".zip"])
329
+
330
+
331
+ # --- Options ---
332
+ gr.Markdown("### Options")
333
+ include_content = gr.Checkbox(label="Include File Content in Output", value=False)
334
+ max_content_kb = gr.Number(
335
+ label="Max File Size for Content (KB)",
336
+ value=100,
337
+ minimum=0,
338
+ step=10,
339
+ info="Files larger than this won't have content included. 0 disables all content.",
340
+ interactive=True # По умолчанию будет активен
341
  )
342
+ ignore_patterns = gr.Textbox(
343
+ label="Ignore Patterns (comma-separated, gitignore style)",
344
+ placeholder=".git/, node_modules/, *.log",
345
+ value=DEFAULT_IGNORE_PATTERNS,
346
+ lines=2,
347
+ info="Uses gitignore syntax. Add / for directories. Defaults provided."
348
  )
349
 
350
+ # --- Кнопка запуска ---
351
+ generate_button = gr.Button("Generate Markdown", variant="primary")
352
+
353
+ # --- Логика видимости инпутов ---
354
+ def switch_input_type(choice):
355
+ return {
356
+ url_inputs: gr.update(visible=choice == "URL"),
357
+ zip_inputs: gr.update(visible=choice == "Upload ZIP"),
358
+ }
359
+ input_type.change(switch_input_type, inputs=input_type, outputs=[url_inputs, zip_inputs])
360
+
361
+ # --- Логика активности поля Max Size ---
362
+ def toggle_max_size(include):
363
+ return gr.update(interactive=include) # Поле активно, только если контент включен
364
+ # include_content.change(toggle_max_size, inputs=include_content, outputs=max_content_kb)
365
+ # Заметка: Убрал авто-деактивацию, т.к. 0 КБ - валидный способ отключить контент.
366
+ # Пусть пользователь сам решает, даже если галка снята.
367
+
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  with gr.Column(scale=2):
370
+ gr.Markdown("### Status & Output")
371
+ status_box = gr.Textbox(label="Current Status", lines=5, interactive=False)
372
+ output_markdown = gr.Textbox(
373
+ label="Generated Markdown",
374
+ lines=20,
375
+ interactive=True, # Позволяет копировать текст
376
+ show_copy_button=True
377
+ )
378
+ download_button = gr.File(label="Download Markdown File", interactive=False)
379
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
+ # --- Привязка кнопки к функции ---
382
+ generate_button.click(
383
+ process_repository,
384
  inputs=[
385
+ input_type,
386
+ repo_url,
387
+ branch_tag,
388
+ zip_upload,
389
+ include_content,
390
+ max_content_kb,
391
+ ignore_patterns,
392
  ],
393
+ outputs=[status_box, output_markdown, download_button],
 
 
394
  )
395
 
396
+ # Запуск приложения (для локального тестирования)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  if __name__ == "__main__":
398
+ demo.launch()