Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,9 @@ import pathlib
|
|
| 7 |
import shutil
|
| 8 |
from pathspec import PathSpec
|
| 9 |
from pathspec.patterns import GitWildMatchPattern
|
|
|
|
| 10 |
|
| 11 |
-
# --- Configuration ---
|
| 12 |
DEFAULT_IGNORE_PATTERNS = """
|
| 13 |
# Default Ignore Patterns (Gitignore Syntax)
|
| 14 |
/.git/
|
|
@@ -33,8 +34,7 @@ INDENT_CHAR = " " # 4 spaces for indentation
|
|
| 33 |
FOLDER_ICON = "π"
|
| 34 |
FILE_ICON = "π"
|
| 35 |
|
| 36 |
-
# --- Core Logic ---
|
| 37 |
-
|
| 38 |
def get_repo_path(source_type, repo_url, branch_tag, zip_file_obj, progress=gr.Progress()):
|
| 39 |
"""Clones or extracts the repository, returning the local path."""
|
| 40 |
temp_dir = tempfile.mkdtemp()
|
|
@@ -84,7 +84,7 @@ def get_repo_path(source_type, repo_url, branch_tag, zip_file_obj, progress=gr.P
|
|
| 84 |
zip_path = zip_file_obj.name # Gradio provides a temp file path
|
| 85 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
| 86 |
# Check for common zip structure (single top-level dir)
|
| 87 |
-
top_level_dirs = list(set(p.split('/')[0] for p in zip_ref.namelist() if '/' in p))
|
| 88 |
extract_target = temp_dir
|
| 89 |
potential_repo_root = temp_dir
|
| 90 |
if len(top_level_dirs) == 1:
|
|
@@ -112,6 +112,10 @@ def get_repo_path(source_type, repo_url, branch_tag, zip_file_obj, progress=gr.P
|
|
| 112 |
raise ValueError("Invalid source type selected.")
|
| 113 |
|
| 114 |
if not repo_path or not repo_path.is_dir():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
raise ValueError(f"Could not determine repository root directory within: {temp_dir}")
|
| 116 |
|
| 117 |
return repo_path, temp_dir # Return both the repo content path and the parent temp dir for cleanup
|
|
@@ -120,8 +124,10 @@ def get_repo_path(source_type, repo_url, branch_tag, zip_file_obj, progress=gr.P
|
|
| 120 |
# Clean up the temporary directory on error before re-raising
|
| 121 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 122 |
print(f"Error in get_repo_path: {e}") # Log error
|
|
|
|
| 123 |
raise e # Re-raise the exception to be caught by the main function
|
| 124 |
|
|
|
|
| 125 |
def generate_markdown_structure(
|
| 126 |
repo_root_path: pathlib.Path,
|
| 127 |
include_content: bool,
|
|
@@ -137,10 +143,13 @@ def generate_markdown_structure(
|
|
| 137 |
# --- Prepare ignore patterns ---
|
| 138 |
# Combine default and user patterns
|
| 139 |
full_ignore_patterns = DEFAULT_IGNORE_PATTERNS.strip() + "\n" + ignore_patterns_str.strip()
|
| 140 |
-
# Filter out empty lines
|
| 141 |
patterns = [line for line in full_ignore_patterns.splitlines() if line.strip() and not line.strip().startswith('#')]
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
# --- Add header ---
|
| 146 |
repo_name = repo_root_path.name
|
|
@@ -150,36 +159,28 @@ def generate_markdown_structure(
|
|
| 150 |
# --- Walk through the directory ---
|
| 151 |
progress(0.6, desc="Scanning repository structure...")
|
| 152 |
files_processed = 0
|
| 153 |
-
|
|
|
|
|
|
|
| 154 |
|
| 155 |
items_scanned = 0
|
| 156 |
-
for item_path in
|
| 157 |
items_scanned += 1
|
| 158 |
if items_scanned % 50 == 0: # Update progress periodically
|
| 159 |
progress(0.6 + (0.3 * (items_scanned / max(1, total_items_estimate))), desc=f"Scanning: {item_path.name}")
|
| 160 |
|
| 161 |
-
|
| 162 |
relative_path = item_path.relative_to(repo_root_path)
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
# Check if the path itself
|
| 165 |
-
#
|
| 166 |
-
|
| 167 |
-
ignored = False
|
| 168 |
-
# Check root path first for patterns like '/node_modules/'
|
| 169 |
-
if spec.match_file(str(relative_path)):
|
| 170 |
-
ignored = True
|
| 171 |
-
# Check parent directories if a pattern like 'node_modules/' should match anywhere
|
| 172 |
-
current_check_path = ""
|
| 173 |
-
for part in components:
|
| 174 |
-
current_check_path = os.path.join(current_check_path, part)
|
| 175 |
-
if spec.match_file(current_check_path):
|
| 176 |
-
ignored = True
|
| 177 |
-
break
|
| 178 |
-
|
| 179 |
-
if ignored:
|
| 180 |
-
# If it's a directory, prevent os.walk from descending further if we were using it
|
| 181 |
-
# With rglob, we just skip the current item
|
| 182 |
print(f"Ignoring: {relative_path}") # Debugging
|
|
|
|
|
|
|
|
|
|
| 183 |
continue
|
| 184 |
|
| 185 |
# Calculate depth and indentation
|
|
@@ -188,13 +189,18 @@ def generate_markdown_structure(
|
|
| 188 |
|
| 189 |
# Add entry to Markdown
|
| 190 |
if item_path.is_dir():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
markdown_lines.append(f"{indent}{FOLDER_ICON} **{item_path.name}/**")
|
| 192 |
elif item_path.is_file():
|
| 193 |
markdown_lines.append(f"{indent}{FILE_ICON} {item_path.name}")
|
| 194 |
files_processed += 1
|
| 195 |
|
| 196 |
# Include file content if requested and within limits
|
| 197 |
-
if include_content and
|
| 198 |
try:
|
| 199 |
file_size = item_path.stat().st_size
|
| 200 |
if file_size == 0:
|
|
@@ -203,36 +209,53 @@ def generate_markdown_structure(
|
|
| 203 |
markdown_lines.append(f"{indent}{INDENT_CHAR}```")
|
| 204 |
elif file_size <= max_file_size_bytes:
|
| 205 |
try:
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
except UnicodeDecodeError:
|
| 223 |
-
|
|
|
|
| 224 |
except Exception as read_err:
|
| 225 |
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Error reading file - {read_err}]")
|
| 226 |
else:
|
| 227 |
-
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: File size ({file_size} bytes) exceeds limit ({max_file_size_bytes} bytes)]")
|
| 228 |
except OSError as stat_err:
|
| 229 |
-
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Error accessing file - {stat_err}]")
|
| 230 |
|
| 231 |
-
elif include_content and
|
| 232 |
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Max file size set to 0 KB]")
|
| 233 |
|
| 234 |
|
| 235 |
-
# Add a newline for separation, helps readability
|
|
|
|
| 236 |
markdown_lines.append("")
|
| 237 |
|
| 238 |
|
|
@@ -254,46 +277,73 @@ def process_repo(
|
|
| 254 |
output_file_path = None
|
| 255 |
repo_root_path = None
|
| 256 |
temp_dir_to_clean = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
try:
|
| 259 |
progress(0, desc="Starting...")
|
|
|
|
|
|
|
| 260 |
|
| 261 |
# 1. Get Repository Path
|
| 262 |
-
yield "Fetching repository...", "", None
|
| 263 |
repo_root_path, temp_dir_to_clean = get_repo_path(
|
| 264 |
source_type, repo_url, branch_tag, zip_file_obj, progress=progress
|
| 265 |
)
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
# 2. Generate Markdown
|
| 269 |
-
yield "Generating Markdown structure...", "", None
|
| 270 |
markdown_content = generate_markdown_structure(
|
| 271 |
-
repo_root_path, include_content,
|
| 272 |
)
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
# 3. Prepare Output File
|
| 275 |
-
yield "Saving Markdown to file...",
|
| 276 |
output_filename = f"{repo_root_path.name}_structure.md"
|
|
|
|
|
|
|
|
|
|
| 277 |
# Save the file in a place Gradio can access (it manages temp files)
|
| 278 |
-
# Create a temporary file for Gradio output
|
| 279 |
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".md", encoding='utf-8') as temp_file:
|
| 280 |
temp_file.write(markdown_content)
|
| 281 |
output_file_path = temp_file.name # Gradio needs the path to this file
|
| 282 |
|
| 283 |
-
|
|
|
|
| 284 |
|
| 285 |
except ValueError as ve:
|
| 286 |
print(f"Value Error: {ve}") # Log error
|
| 287 |
-
|
|
|
|
|
|
|
| 288 |
except subprocess.CalledProcessError as cpe:
|
| 289 |
error_detail = cpe.stderr or cpe.stdout or "Unknown git error"
|
| 290 |
print(f"Git Error: {error_detail}") # Log error
|
| 291 |
-
|
|
|
|
|
|
|
| 292 |
except Exception as e:
|
| 293 |
print(f"Unexpected Error: {e}") # Log error
|
| 294 |
-
import traceback
|
| 295 |
traceback.print_exc() # Print full traceback to logs
|
| 296 |
-
|
|
|
|
| 297 |
finally:
|
| 298 |
# 4. Cleanup
|
| 299 |
if temp_dir_to_clean:
|
|
@@ -302,7 +352,7 @@ def process_repo(
|
|
| 302 |
print("Cleanup complete.")
|
| 303 |
|
| 304 |
|
| 305 |
-
# --- Build Gradio UI ---
|
| 306 |
|
| 307 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="cyan")) as demo:
|
| 308 |
gr.Markdown("# GitHub Repository to Markdown Converter")
|
|
@@ -344,10 +394,10 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="cyan")
|
|
| 344 |
# Use a Textbox for preview initially, as Markdown rendering can be slow/heavy
|
| 345 |
markdown_preview_output = gr.Textbox(label="Markdown Preview (Truncated)", interactive=False, lines=20)
|
| 346 |
# Use gr.File for the final download link
|
| 347 |
-
download_output = gr.File(label="Download Markdown File", visible=False, interactive=False)
|
| 348 |
|
| 349 |
|
| 350 |
-
# --- Event Handlers ---
|
| 351 |
def toggle_input_visibility(choice):
|
| 352 |
if choice == "URL":
|
| 353 |
return gr.update(visible=True), gr.update(visible=False)
|
|
@@ -370,6 +420,8 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="cyan")
|
|
| 370 |
# api_name="generate_markdown" # Optional: for API access
|
| 371 |
)
|
| 372 |
|
| 373 |
-
# --- Launch the App ---
|
| 374 |
if __name__ == "__main__":
|
| 375 |
-
|
|
|
|
|
|
|
|
|
| 7 |
import shutil
|
| 8 |
from pathspec import PathSpec
|
| 9 |
from pathspec.patterns import GitWildMatchPattern
|
| 10 |
+
import traceback # Import traceback for better error logging
|
| 11 |
|
| 12 |
+
# --- Configuration --- (Keep as before)
|
| 13 |
DEFAULT_IGNORE_PATTERNS = """
|
| 14 |
# Default Ignore Patterns (Gitignore Syntax)
|
| 15 |
/.git/
|
|
|
|
| 34 |
FOLDER_ICON = "π"
|
| 35 |
FILE_ICON = "π"
|
| 36 |
|
| 37 |
+
# --- Core Logic --- (Keep get_repo_path and generate_markdown_structure as before)
|
|
|
|
| 38 |
def get_repo_path(source_type, repo_url, branch_tag, zip_file_obj, progress=gr.Progress()):
|
| 39 |
"""Clones or extracts the repository, returning the local path."""
|
| 40 |
temp_dir = tempfile.mkdtemp()
|
|
|
|
| 84 |
zip_path = zip_file_obj.name # Gradio provides a temp file path
|
| 85 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
| 86 |
# Check for common zip structure (single top-level dir)
|
| 87 |
+
top_level_dirs = list(set(p.split('/')[0] for p in zip_ref.namelist() if '/' in p and p.split('/')[0]))
|
| 88 |
extract_target = temp_dir
|
| 89 |
potential_repo_root = temp_dir
|
| 90 |
if len(top_level_dirs) == 1:
|
|
|
|
| 112 |
raise ValueError("Invalid source type selected.")
|
| 113 |
|
| 114 |
if not repo_path or not repo_path.is_dir():
|
| 115 |
+
# Add more specific debugging info here
|
| 116 |
+
print(f"Debug Info: Temp dir content: {list(os.listdir(temp_dir))}")
|
| 117 |
+
if 'potential_repo_root' in locals() and potential_repo_root != temp_dir:
|
| 118 |
+
print(f"Debug Info: Potential repo root '{potential_repo_root}' exists: {os.path.exists(potential_repo_root)}, is dir: {os.path.isdir(potential_repo_root)}")
|
| 119 |
raise ValueError(f"Could not determine repository root directory within: {temp_dir}")
|
| 120 |
|
| 121 |
return repo_path, temp_dir # Return both the repo content path and the parent temp dir for cleanup
|
|
|
|
| 124 |
# Clean up the temporary directory on error before re-raising
|
| 125 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 126 |
print(f"Error in get_repo_path: {e}") # Log error
|
| 127 |
+
traceback.print_exc() # Print full traceback for debugging get_repo_path issues
|
| 128 |
raise e # Re-raise the exception to be caught by the main function
|
| 129 |
|
| 130 |
+
|
| 131 |
def generate_markdown_structure(
|
| 132 |
repo_root_path: pathlib.Path,
|
| 133 |
include_content: bool,
|
|
|
|
| 143 |
# --- Prepare ignore patterns ---
|
| 144 |
# Combine default and user patterns
|
| 145 |
full_ignore_patterns = DEFAULT_IGNORE_PATTERNS.strip() + "\n" + ignore_patterns_str.strip()
|
| 146 |
+
# Filter out empty lines and comments
|
| 147 |
patterns = [line for line in full_ignore_patterns.splitlines() if line.strip() and not line.strip().startswith('#')]
|
| 148 |
+
# Create unique list while preserving order (important if later patterns override earlier ones)
|
| 149 |
+
seen = set()
|
| 150 |
+
unique_patterns = [x for x in patterns if not (x in seen or seen.add(x))]
|
| 151 |
+
spec = PathSpec.from_lines(GitWildMatchPattern, unique_patterns)
|
| 152 |
+
print(f"Using unique ignore patterns: {unique_patterns}") # Debugging
|
| 153 |
|
| 154 |
# --- Add header ---
|
| 155 |
repo_name = repo_root_path.name
|
|
|
|
| 159 |
# --- Walk through the directory ---
|
| 160 |
progress(0.6, desc="Scanning repository structure...")
|
| 161 |
files_processed = 0
|
| 162 |
+
# Need to iterate through items relative to the root for pathspec matching
|
| 163 |
+
all_items = sorted(list(repo_root_path.rglob('*')))
|
| 164 |
+
total_items_estimate = len(all_items) # More accurate estimate
|
| 165 |
|
| 166 |
items_scanned = 0
|
| 167 |
+
for item_path in all_items:
|
| 168 |
items_scanned += 1
|
| 169 |
if items_scanned % 50 == 0: # Update progress periodically
|
| 170 |
progress(0.6 + (0.3 * (items_scanned / max(1, total_items_estimate))), desc=f"Scanning: {item_path.name}")
|
| 171 |
|
|
|
|
| 172 |
relative_path = item_path.relative_to(repo_root_path)
|
| 173 |
+
# Pathspec matches against the path string relative to the root where .gitignore would be
|
| 174 |
+
# Important: Add a leading '/' for patterns like '/node_modules/' to only match at the root
|
| 175 |
+
path_str_for_match = str(relative_path)
|
| 176 |
|
| 177 |
+
# Check if the path itself should be ignored
|
| 178 |
+
# Pathspec automatically handles directory patterns (e.g., node_modules/ matches files and dirs inside)
|
| 179 |
+
if spec.match_file(path_str_for_match):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
print(f"Ignoring: {relative_path}") # Debugging
|
| 181 |
+
# If it's a directory, we don't need to manually skip recursion because
|
| 182 |
+
# rglob already gave us all paths; we just skip processing this specific path.
|
| 183 |
+
# If we were using os.walk, we'd modify the dirs list here.
|
| 184 |
continue
|
| 185 |
|
| 186 |
# Calculate depth and indentation
|
|
|
|
| 189 |
|
| 190 |
# Add entry to Markdown
|
| 191 |
if item_path.is_dir():
|
| 192 |
+
# Check if dir is empty *after* considering ignores. This is tricky with rglob.
|
| 193 |
+
# A simple heuristic: check if any non-ignored children exist directly within it.
|
| 194 |
+
# This isn't perfect but avoids complex lookahead.
|
| 195 |
+
# has_children = any(p.relative_to(repo_root_path).parts[0] == relative_path.parts[0] and not spec.match_file(str(p.relative_to(repo_root_path))) for p in all_items if p != item_path and p.parent == item_path)
|
| 196 |
+
# Simpler: Just always list the dir for now. Empty dir check is complex with ignores + rglob.
|
| 197 |
markdown_lines.append(f"{indent}{FOLDER_ICON} **{item_path.name}/**")
|
| 198 |
elif item_path.is_file():
|
| 199 |
markdown_lines.append(f"{indent}{FILE_ICON} {item_path.name}")
|
| 200 |
files_processed += 1
|
| 201 |
|
| 202 |
# Include file content if requested and within limits
|
| 203 |
+
if include_content and max_size_kb > 0: # Check > 0 explicitly
|
| 204 |
try:
|
| 205 |
file_size = item_path.stat().st_size
|
| 206 |
if file_size == 0:
|
|
|
|
| 209 |
markdown_lines.append(f"{indent}{INDENT_CHAR}```")
|
| 210 |
elif file_size <= max_file_size_bytes:
|
| 211 |
try:
|
| 212 |
+
# Attempt to detect binary files heuristically before reading large ones
|
| 213 |
+
is_binary = False
|
| 214 |
+
try:
|
| 215 |
+
# Read a small chunk to check for null bytes
|
| 216 |
+
with open(item_path, 'rb') as bf:
|
| 217 |
+
chunk = bf.read(1024)
|
| 218 |
+
if b'\x00' in chunk:
|
| 219 |
+
is_binary = True
|
| 220 |
+
except Exception:
|
| 221 |
+
# Ignore errors during binary check, proceed as text
|
| 222 |
+
pass
|
| 223 |
+
|
| 224 |
+
if is_binary:
|
| 225 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Likely a binary file (Size: {file_size} bytes)]")
|
| 226 |
+
else:
|
| 227 |
+
content = item_path.read_text(encoding='utf-8', errors='replace') # Replace errors instead of failing
|
| 228 |
+
lang = item_path.suffix.lstrip('.')
|
| 229 |
+
# Simple lang detection, can be expanded
|
| 230 |
+
if not lang: lang = "text"
|
| 231 |
+
|
| 232 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}```{lang}")
|
| 233 |
+
# Indent content lines
|
| 234 |
+
content_lines = content.splitlines()
|
| 235 |
+
# Limit output lines displayed in Markdown preview if necessary
|
| 236 |
+
# Note: The downloaded file will have full content
|
| 237 |
+
display_lines = content_lines[:MAX_OUTPUT_LINES]
|
| 238 |
+
for line in display_lines:
|
| 239 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}{line}")
|
| 240 |
+
if len(content_lines) > MAX_OUTPUT_LINES:
|
| 241 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}[... content truncated in preview ...]")
|
| 242 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}```")
|
| 243 |
except UnicodeDecodeError:
|
| 244 |
+
# Should be less common now with errors='replace'
|
| 245 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Error decoding file as UTF-8 (Size: {file_size} bytes)]")
|
| 246 |
except Exception as read_err:
|
| 247 |
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Error reading file - {read_err}]")
|
| 248 |
else:
|
| 249 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: File size ({file_size:,} bytes) exceeds limit ({max_file_size_bytes:,} bytes)]") # Added commas
|
| 250 |
except OSError as stat_err:
|
| 251 |
+
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Error accessing file stats - {stat_err}]")
|
| 252 |
|
| 253 |
+
elif include_content and max_size_kb == 0: # Content included checked, but 0 size limit
|
| 254 |
markdown_lines.append(f"{indent}{INDENT_CHAR}[Content omitted: Max file size set to 0 KB]")
|
| 255 |
|
| 256 |
|
| 257 |
+
# Add a newline for separation, helps readability only if content wasn't added (which adds ```\n)
|
| 258 |
+
# Or maybe always add it for consistency between file/dir entries
|
| 259 |
markdown_lines.append("")
|
| 260 |
|
| 261 |
|
|
|
|
| 277 |
output_file_path = None
|
| 278 |
repo_root_path = None
|
| 279 |
temp_dir_to_clean = None
|
| 280 |
+
# Ensure max_size_kb is treated as a number
|
| 281 |
+
try:
|
| 282 |
+
max_size_kb_int = int(max_size_kb) if max_size_kb is not None else 0
|
| 283 |
+
except ValueError:
|
| 284 |
+
yield "Error: Max File Size must be a number.", "", gr.update(value=None, visible=False)
|
| 285 |
+
return
|
| 286 |
+
|
| 287 |
|
| 288 |
try:
|
| 289 |
progress(0, desc="Starting...")
|
| 290 |
+
# Initial state update for all outputs
|
| 291 |
+
yield "Preparing...", "", gr.update(value=None, visible=False)
|
| 292 |
|
| 293 |
# 1. Get Repository Path
|
| 294 |
+
yield "Fetching repository...", "", gr.update(value=None, visible=False)
|
| 295 |
repo_root_path, temp_dir_to_clean = get_repo_path(
|
| 296 |
source_type, repo_url, branch_tag, zip_file_obj, progress=progress
|
| 297 |
)
|
| 298 |
+
# Check if path finding was successful before proceeding
|
| 299 |
+
if not repo_root_path:
|
| 300 |
+
# Error should have been raised in get_repo_path, but double-check
|
| 301 |
+
raise ValueError("Failed to obtain repository path.")
|
| 302 |
+
|
| 303 |
+
yield f"Repository ready at: {repo_root_path.name}", "", gr.update(value=None, visible=False)
|
| 304 |
|
| 305 |
# 2. Generate Markdown
|
| 306 |
+
yield "Generating Markdown structure...", "", gr.update(value=None, visible=False)
|
| 307 |
markdown_content = generate_markdown_structure(
|
| 308 |
+
repo_root_path, include_content, max_size_kb_int, ignore_patterns, progress=progress
|
| 309 |
)
|
| 310 |
|
| 311 |
+
# Limit preview size robustly
|
| 312 |
+
preview_limit = 3000
|
| 313 |
+
markdown_preview = markdown_content[:preview_limit]
|
| 314 |
+
if len(markdown_content) > preview_limit:
|
| 315 |
+
markdown_preview += "\n\n[... Output truncated in preview ...]"
|
| 316 |
+
|
| 317 |
# 3. Prepare Output File
|
| 318 |
+
yield "Saving Markdown to file...", markdown_preview, gr.update(value=None, visible=False)
|
| 319 |
output_filename = f"{repo_root_path.name}_structure.md"
|
| 320 |
+
# Sanitize filename slightly (replace spaces, etc.) - less critical in temp file context
|
| 321 |
+
output_filename = "".join(c if c.isalnum() or c in ('_', '-', '.') else '_' for c in output_filename)
|
| 322 |
+
|
| 323 |
# Save the file in a place Gradio can access (it manages temp files)
|
|
|
|
| 324 |
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".md", encoding='utf-8') as temp_file:
|
| 325 |
temp_file.write(markdown_content)
|
| 326 |
output_file_path = temp_file.name # Gradio needs the path to this file
|
| 327 |
|
| 328 |
+
# *** CORRECTED YIELD USING gr.update ***
|
| 329 |
+
yield f"Done. Output file '{output_filename}' ready for download.", markdown_preview, gr.update(value=output_file_path, visible=True, label=f"Download {output_filename}")
|
| 330 |
|
| 331 |
except ValueError as ve:
|
| 332 |
print(f"Value Error: {ve}") # Log error
|
| 333 |
+
traceback.print_exc()
|
| 334 |
+
# *** CORRECTED YIELD USING gr.update ***
|
| 335 |
+
yield f"Error: {ve}", "", gr.update(value=None, visible=False)
|
| 336 |
except subprocess.CalledProcessError as cpe:
|
| 337 |
error_detail = cpe.stderr or cpe.stdout or "Unknown git error"
|
| 338 |
print(f"Git Error: {error_detail}") # Log error
|
| 339 |
+
traceback.print_exc()
|
| 340 |
+
# *** CORRECTED YIELD USING gr.update ***
|
| 341 |
+
yield f"Git command failed: {error_detail}", "", gr.update(value=None, visible=False)
|
| 342 |
except Exception as e:
|
| 343 |
print(f"Unexpected Error: {e}") # Log error
|
|
|
|
| 344 |
traceback.print_exc() # Print full traceback to logs
|
| 345 |
+
# *** CORRECTED YIELD USING gr.update ***
|
| 346 |
+
yield f"An unexpected error occurred: {e}", "", gr.update(value=None, visible=False)
|
| 347 |
finally:
|
| 348 |
# 4. Cleanup
|
| 349 |
if temp_dir_to_clean:
|
|
|
|
| 352 |
print("Cleanup complete.")
|
| 353 |
|
| 354 |
|
| 355 |
+
# --- Build Gradio UI --- (Keep as before)
|
| 356 |
|
| 357 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="cyan")) as demo:
|
| 358 |
gr.Markdown("# GitHub Repository to Markdown Converter")
|
|
|
|
| 394 |
# Use a Textbox for preview initially, as Markdown rendering can be slow/heavy
|
| 395 |
markdown_preview_output = gr.Textbox(label="Markdown Preview (Truncated)", interactive=False, lines=20)
|
| 396 |
# Use gr.File for the final download link
|
| 397 |
+
download_output = gr.File(label="Download Markdown File", visible=False, interactive=False) # Set interactive=False
|
| 398 |
|
| 399 |
|
| 400 |
+
# --- Event Handlers --- (Keep as before)
|
| 401 |
def toggle_input_visibility(choice):
|
| 402 |
if choice == "URL":
|
| 403 |
return gr.update(visible=True), gr.update(visible=False)
|
|
|
|
| 420 |
# api_name="generate_markdown" # Optional: for API access
|
| 421 |
)
|
| 422 |
|
| 423 |
+
# --- Launch the App --- (Keep as before)
|
| 424 |
if __name__ == "__main__":
|
| 425 |
+
# Ensure queue is enabled for HF Spaces deployment
|
| 426 |
+
# debug=True is useful for local testing, might remove/set to False for production space
|
| 427 |
+
demo.queue().launch(debug=True)
|