pesquisa.ai

Running

App Files Files Community

pesquisa.ai / app.py

fschwartzer

Update app.py

1e66579 verified 6 months ago

raw

history blame contribute delete

34 kB

	import subprocess
	import sys
	import os

	def ensure_playwright_chromium():
	"""Ensures Playwright Chromium browser is installed."""
	try:
	print("Checking and installing Playwright Chromium browser if needed...")
	subprocess.run(
	[sys.executable, "-m", "playwright", "install", "chromium"],
	check=True,
	capture_output=True,
	text=True
	)
	print("Playwright Chromium browser is ready.")
	except subprocess.CalledProcessError as e:
	print(f"Error during Playwright Chromium installation: {e}")
	print(f"Stdout: {e.stdout}")
	print(f"Stderr: {e.stderr}")
	except FileNotFoundError:
	print("Error: Python executable or Playwright module not found. Ensure your environment is set up correctly.")

	ensure_playwright_chromium()

	import gradio as gr
	import requests # Still used for other things potentially, or could be removed if not
	from bs4 import BeautifulSoup
	from bs4 import Comment
	import re
	import pandas as pd
	import validators
	# from selenium import webdriver # No longer used in the primary fetch path
	# from selenium.webdriver.chrome.service import Service # No longer used
	# from selenium.webdriver.chrome.options import Options # No longer used
	# from selenium.webdriver.common.by import By # No longer used
	import json
	import time
	import random
	from playwright.sync_api import sync_playwright
	from playwright_stealth import stealth_sync


	# Constantes
	ESTADOS_BR = ["AC", "AL", "AM", "AP", "BA", "CE", "DF", "ES", "GO", "MA", "MG", "MS", "MT", "PA", "PB", "PE", "PI", "PR", "RJ", "RN", "RO", "RR", "RS", "SC", "SE", "SP", "TO"]
	USER_AGENTS = [
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
	"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
	]


	# Dicionários (omitted for brevity, they are unchanged)
	dict_topo = {
	'plano <5%': 1,
	'aclive_leve 5% e 30%': 0.95,
	'declive_leve 5% e 30%': 0.90,
	'aclive_acentuado >30%': 0.85,
	'declive_acentuado >30%': 0.80,
	'-': '-'
	}

	dict_rel = {
	'plana': 1.1,
	'ondulada': 1.00,
	'montanhosa/acidentada': 0.80,
	'-': '-'
	}

	dict_sup = {
	'Seca': 1.00,
	'Região inundável mas não atingida': 0.90,
	'Região inundável mas atingida periodicamente': 0.70,
	'Alagada': 0.60,
	'-': '-'
	}

	dict_apr = {
	'Loteamento': 1.00,
	'Indústria': 0.90,
	'Culturas': 0.80,
	'-': '-'
	}

	dict_ace = {
	'Ótima': 1.00,
	'Muito boa': 0.95,
	'Boa': 0.90,
	'Desfavorável': 0.80,
	'Má': 0.75,
	'Péssima': 0.70,
	'-': '-'
	}

	dict_ic = {
	'id<5_novo': 1.00,
	'id<5_bom': 0.95,
	'id<5_reparos simples': 0.80,
	'id<5_reparos importantes': 0.45,
	'id entre 6 e 10_novo': 0.95,
	'id entre 6 e 10_bom': 0.90,
	'id entre 6 e 10_reparos simples': 0.75,
	'id entre 6 e 10_reparos importantes': 0.40,
	'id entre 11 e 30_novo': 0.85,
	'id entre 11 e 30_bom': 0.80,
	'id entre 11 e 30_reparos simples': 0.65,
	'id entre 11 e 30_reparos importantes': 0.35,
	'id entre 31 e 50_novo': 0.55,
	'id entre 31 e 50_bom': 0.50,
	'id entre 31 e 50_reparos simples': 0.45,
	'id entre 31 e 50_reparos importantes': 0.25,
	'id>50_novo': 0.30,
	'id>50_bom': 0.20,
	'id>50_reparos simples': 0.15,
	'id>50_reparos importantes': 0.10,
	'-': '-'
	}

	dict_pad = {
	'Mínimo': 1.00,
	'Baixo': 1.15,
	'Normal c/ aspecto de baixo ': 1.30,
	'Normal forte predominância': 1.45,
	'Normal com aspecto de alto': 1.60,
	'Alto': 1.75,
	'Luxo': 1.90,
	'-': '-'
	}

	PATTERNS = {
	"endereco": [
	r'(?:rua\|avenida\|estrada\|alameda\|praça\|travessa)\s+[\w\s\d\-,.]+?\b(?:' + "\|".join(ESTADOS_BR) + r')\b',
	r'(?:endereço\|localização\|address)\s:\s[\w\s\d\-,.]+?\b(?:' + "\|".join(ESTADOS_BR) + r')\b',
	r'[\w\s\d\-,.]+\b(?:' + "\|".join(ESTADOS_BR) + r')\b',
	r'(?:rua\|avenida\|estrada\|alameda\|praça\|travessa)\s+[\w\s\d\-,.]+',
	r'([A-Za-z\s\-.À-ú]+)\s-\s([A-Za-z\s\-.À-ú]+)/([A-Z]{2})', # Improved for accented chars
	],
	"testada": [
	r'(\d{1,3}(?:[.,]\d{1,2})?)m?\s[xX]\s\d',
	r'(\d{1,3}(?:[.,]\d{1,2})?)\smetros?\sde\s*frente',
	r'front\s:\s(\d{1,3}(?:[.,]\d{1,2})?)',
	r"Metragem\sde\sfrente\s:\s(\d+,\d+\|\d+)\s*m",
	r"(\d+,\d+\|\d+)\sm\s[xX]\s*\d+"
	],
	"valor": [
	r'R\$[\s]<strong>[\s]([\d.]+,[\d]{2})[\s]*<\/strong>',
	r"Valor\sdo\simóvel\sR\$\s([\d.,]+)",
	r'valor\s:\sR\$\s(\d[\d\.,])',
	r'preço\s:\sR\$\s(\d[\d\.,])',
	r'value\s:\s\$(\d[\d\.,]*)',
	r'R\$\s(\d[\d\.,])',
	# r's(\d[\d\.,])', # This pattern is too broad and can cause issues, commented out
	],
	"area": [
	r"(\d[\d.,])\sm²\sde\sárea\s*total",
	r"(\d[\d.,])\sm²\sde\sárea\s*construída",
	r"(\d[\d.,])\sm²\sde\sárea\s*privativa",
	r'área\s(?:do\sterreno\|total)\s[:–-]?\s(\d[\d.,])\sm²',
	r'área\sconstruída\s[:–-]?\s(\d[\d.,])\s*m²',
	r'área\sprivativa\s[:–-]?\s(\d[\d.,])\s*m²',
	r'area\s:\s(\d[\d.,]+)\s*m²',
	r'size\s:\s(\d[\d.,]+)\s*sqft',
	r"(\d+,\d+\|\d+)\sm²\sárea total",
	r"(\d+,\d+\|\d+)\sm²\sárea privativa",
	r"(\d{1,3}(?:\.\d{3})*(?:,\d{2})?)(m²\|ha)\s+área total do terreno",
	r'(\d[\d.,]+)\s*m²',
	],
	"dormitorios": [
	r'(\d+)\s*(?:quarto\|quartos\|dormit[oó]rio\|dormit[oó]rios\|dorm\.\|dorms\.)',
	r'bedroom[s]?\s:\s(\d+)',
	r'dormit[oó]rio[s]?\s[:–-]?\s(\d+)',
	r'quarto[s]?\s[:–-]?\s(\d+)',
	],
	"banheiros": [
	r'(\d+)\s*(?:banheiro\|banheiros\|wc\|banho\|banhos)',
	r'bathroom[s]?\s:\s(\d+)',
	],
	"vagas": [
	r'(\d+)\s(?:vaga\|vagas)(?:\sde\s*garagem)?',
	r'parking\s:\s(\d+)',
	r'garagem\s[:–-]?\s(\d+)\s*vaga',
	],
	"suites": [
	r'(\d+)\s*(?:su[ií]te\|su[ií]tes)',
	r'suite[s]?\s:\s(\d+)',
	],
	}


	def extract_info(text, patterns):
	for pattern in patterns:
	match = re.search(pattern, text, re.IGNORECASE)
	if match:
	if match.groups():
	if len(match.groups()) == 3 and "/" in match.group(0) and pattern == r'([A-Za-z\s\-.À-ú]+)\s-\s([A-Za-z\s\-.À-ú]+)/([A-Z]{2})':
	bairro = match.group(1).strip()
	cidade = match.group(2).strip()
	estado = match.group(3).strip()
	return f"{bairro} - {cidade}/{estado}"
	return match.group(1).strip()
	return match.group(0).strip()
	return '-'

	def clean_text_for_testada(page_text):
	return re.sub(r'^\s\?\s\!\[Image[^\n]\n?', '', page_text, flags=re.MULTILINE)

	def extract_metadata(soup):
	metadata = {}
	for meta in soup.find_all("meta"):
	name_prop = meta.get("property") or meta.get("name")
	if name_prop and meta.get("content"):
	metadata[name_prop.lower()] = meta["content"]
	return metadata

	def extract_json_scripts(soup):
	scripts = soup.find_all("script", type="application/ld+json")
	data = []
	for script in scripts:
	try:
	# Remove comments within the script tag if any
	script_content = ""
	for content_part in script.contents:
	if isinstance(content_part, Comment):
	continue
	script_content += str(content_part)

	json_data = json.loads(script_content)
	data.append(json_data)
	except (json.JSONDecodeError, TypeError):
	continue
	return data

	def get_main_page_text(soup_obj):
	# Try to find common main content containers
	main_containers = ["article", "main", "[role='main']"] # Common semantic tags/attributes
	# Less specific, but common for content blocks
	div_selectors = [
	"div[class='content']", "div[id='content']",
	"div[class='main']", "div[id='main']",
	"div[class='body']", "div[id='body']",
	"div[class='post']", "div[id='post']",
	"div[class*='listing-details']" # Specific to listings
	]

	content_element = None
	for selector in main_containers:
	element = soup_obj.select_one(selector)
	if element:
	content_element = element
	break

	if not content_element:
	for selector in div_selectors:
	element = soup_obj.select_one(selector)
	if element and len(element.get_text(strip=True)) > 200: # Heuristic for meaningful content
	content_element = element
	break

	if content_element:
	return content_element.get_text(separator='\n', strip=True)

	# Fallback to body if no specific main content found
	body_tag = soup_obj.body
	if body_tag:
	return body_tag.get_text(separator='\n', strip=True)

	return soup_obj.get_text(separator='\n', strip=True) # Ultimate fallback

	def smart_fetch_url_info(user_input_url):
	if not validators.url(user_input_url):
	return pd.DataFrame(), "URL inválida. Verifique e tente novamente.", None, None

	html_content = None
	page_title = ""
	final_url = user_input_url # To store the URL after potential redirects

	with sync_playwright() as p:
	browser = None
	context = None
	page = None
	try:
	browser = p.chromium.launch(
	headless=True,
	args=[
	'--no-sandbox',
	'--disable-setuid-sandbox',
	'--disable-dev-shm-usage',
	'--disable-accelerated-2d-canvas',
	'--no-first-run',
	'--no-zygote',
	# '--single-process', # Potentially for very constrained environments, but can be less stable
	'--disable-gpu',
	'--disable-blink-features=AutomationControlled'
	]
	)
	context = browser.new_context(
	user_agent=random.choice(USER_AGENTS),
	viewport={'width': 1920, 'height': 1080},
	locale='pt-BR',
	timezone_id='America/Sao_Paulo',
	# ignore_https_errors=True # Use with caution if SSL certs are an issue
	)
	context.set_default_navigation_timeout(60000) # 60 seconds for navigation
	context.set_default_timeout(45000) # 45 seconds for other operations

	page = context.new_page()
	stealth_sync(page) # Apply stealth patches

	print(f"Fetching {user_input_url} with Playwright...")
	# Try to block common resource types that slow down loading and are not needed for text
	# page.route("*/", lambda route: route.abort() if route.request.resource_type in {"image", "stylesheet", "font", "media"} else route.continue_())

	response = page.goto(user_input_url, wait_until="domcontentloaded", timeout=60000)

	# Wait for potential dynamic content loading, or specific selectors if known
	# Example: page.wait_for_selector("body", timeout=10000)
	time.sleep(random.uniform(4, 8)) # Allow time for JS execution

	html_content = page.content()
	page_title = page.title()
	final_url = page.url # Get the URL after any redirects

	except Exception as e:
	print(f"Playwright fetching error for {user_input_url}: {e}")
	return pd.DataFrame(), f"Erro ao buscar com Playwright: {e}", None, None
	finally:
	if page:
	try: page.close()
	except Exception as e_page: print(f"Error closing page: {e_page}")
	if context:
	try: context.close()
	except Exception as e_ctx: print(f"Error closing context: {e_ctx}")
	if browser:
	try: browser.close()
	except Exception as e_browser: print(f"Error closing browser: {e_browser}")

	if not html_content:
	return pd.DataFrame(), "Não foi possível obter o conteúdo da página com Playwright.", None, None

	soup = BeautifulSoup(html_content, "html.parser")

	metadata = extract_metadata(soup)
	json_scripts = extract_json_scripts(soup)

	page_text = get_main_page_text(soup)
	cleaned_text_for_testada_val = clean_text_for_testada(page_text) # Renamed variable

	# Enhanced Endereço Extraction
	endereco_json_parts = []
	if json_scripts:
	for script_data_list in json_scripts: # json_scripts is a list of dicts/lists
	# Handle if script_data_list is a list itself (e.g., graph of entities)
	items_to_check = script_data_list if isinstance(script_data_list, list) else [script_data_list]
	for script_data in items_to_check:
	if isinstance(script_data, dict):
	# Common Schema.org types for properties
	if script_data.get("@type") in ["RealEstateListing", "Residence", "Place", "Apartment", "House", "SingleFamilyResidence"]:
	addr_obj = script_data.get("address")
	if isinstance(addr_obj, dict):
	street = addr_obj.get("streetAddress", "")
	locality = addr_obj.get("addressLocality", "")
	region = addr_obj.get("addressRegion", "")
	postal_code = addr_obj.get("postalCode", "")
	country = addr_obj.get("addressCountry", "")
	# Construct address string, prefer more specific parts
	current_addr_parts = [p for p in [street, locality, region, postal_code, country] if p]
	if current_addr_parts:
	endereco_json_parts.append(", ".join(current_addr_parts))
	break # Found one, assume it's the primary
	if endereco_json_parts: break
	endereco_json_val = endereco_json_parts[0] if endereco_json_parts else None

	endereco = (
	extract_info(page_text, PATTERNS["endereco"]) or
	metadata.get("og:street-address") or # More specific OG tags
	metadata.get("og:locality") or
	metadata.get("og:region") or
	metadata.get("twitter:data1") or # Sometimes address is here
	metadata.get("place:location:street_address") or # Facebook Places
	endereco_json_val or
	metadata.get("og:address") # Generic fallback
	)
	if not endereco or endereco == '-': # If regex fails, try a broader search in metadata description
	description_text = metadata.get("description", "") + " " + metadata.get("og:description", "")
	endereco = extract_info(description_text, PATTERNS["endereco"])


	valor_str = extract_info(page_text, PATTERNS["valor"])
	area_str = extract_info(page_text, PATTERNS["area"])
	dorm_str = extract_info(page_text, PATTERNS["dormitorios"])
	banheiros_str = extract_info(page_text, PATTERNS["banheiros"])
	vagas_str = extract_info(page_text, PATTERNS["vagas"])
	suites_str = extract_info(page_text, PATTERNS["suites"])
	testada_str = extract_info(cleaned_text_for_testada_val, PATTERNS["testada"])


	# Data Cleaning
	try:
	valor_cleaned = str(valor_str).replace('R$', '').replace('.', '').replace(',', '.').strip()
	valor_float = float(valor_cleaned) if valor_cleaned and valor_cleaned.replace('.', '', 1).replace('-', '', 1).isdigit() else '-'
	except (AttributeError, ValueError):
	valor_float = '-'

	try:
	area_match = re.search(r'(\d[\d,.]*)', str(area_str)) # Get first number sequence
	area_cleaned = area_match.group(1).replace('.', '').replace(',', '.') if area_match else str(area_str)
	area_float = float(area_cleaned) if area_cleaned and area_cleaned.replace('.', '', 1).isdigit() else '-'
	except (AttributeError, ValueError):
	area_float = '-'

	def to_int_or_dash(val_str):
	if isinstance(val_str, (int, float)): return int(val_str)
	if isinstance(val_str, str):
	cleaned_val = re.sub(r'\D', '', val_str) # Remove non-digits
	if cleaned_val.isdigit(): return int(cleaned_val)
	return '-'

	dorm_int = to_int_or_dash(dorm_str)
	banheiros_int = to_int_or_dash(banheiros_str)
	vagas_int = to_int_or_dash(vagas_str)
	suites_int = to_int_or_dash(suites_str)

	try:
	testada_cleaned = str(testada_str).replace(',', '.')
	testada_float = float(testada_cleaned) if testada_cleaned and testada_cleaned.replace('.', '', 1).isdigit() else '-'
	except (AttributeError, ValueError):
	testada_float = '-'

	result_text = f"{page_title}\n\nURL: {final_url}\n\n{page_text[:10000]}..."

	df = pd.DataFrame([{
	"Endereço": endereco if endereco and endereco != '-' else 'Não encontrado',
	"Área": area_float,
	"Testada": testada_float,
	"Valor": valor_float,
	"Dorm": dorm_int,
	"Banheiros": banheiros_int,
	"Vagas": vagas_int,
	"Suítes": suites_int,
	"URL": final_url, # Use final URL after redirects
	"Topografia": '-', "Relevo": '-', "Superfície": '-', "Aproveitamento": '-',
	"Acessibilidade": '-', "Idade e conservação": '-', "Padrão construtivo": '-', "Outra característica": '-'
	}])
	return df, result_text, endereco, valor_float


	# Acumulador (omitted for brevity, unchanged)
	def adicionar_ao_acumulado(df_atual, df_acumulado, topo, rel, sup, apr, ace, ic, pad, var):
	if df_atual.empty:
	return df_acumulado, df_acumulado, ""
	df_novo = df_atual.copy()
	df_novo.insert(0, "Dado", f"Dado {len(df_acumulado)+1}")

	# Adicionar valores dos dropdowns
	df_novo["Topografia"] = topo
	df_novo["Relevo"] = rel
	df_novo["Superfície"] = sup
	df_novo["Aproveitamento"] = apr
	df_novo["Acessibilidade"] = ace
	df_novo["Idade e conservação"] = ic
	df_novo["Padrão construtivo"] = pad
	df_novo["Outra característica"] = var

	# Calcular VU (Valor / Área), evitando divisão por zero ou nulos
	df_novo["VU"] = df_novo.apply(
	lambda row: round(row["Valor"] / row["Área"], 2)
	if isinstance(row["Valor"], (int, float)) and isinstance(row["Área"], (int, float)) and row["Área"] != 0 and row["Área"] != '-' and row["Valor"] != '-'
	else '-',
	axis=1
	)
	# Reordenar colunas para colocar VU depois de Valor
	cols = df_novo.columns.tolist()
	valor_index = cols.index("Valor")
	vu_index = cols.index("VU")
	cols.insert(valor_index + 1, cols.pop(vu_index))
	df_novo = df_novo[cols]

	df_acumulado = pd.concat([df_acumulado, df_novo], ignore_index=True)

	# Filtrar apenas valores numéricos para cálculos estatísticos
	valor_numeric = pd.to_numeric(df_acumulado["Valor"], errors="coerce")
	vu_numeric = pd.to_numeric(df_acumulado["VU"], errors="coerce")

	# Calcular estatísticas
	quantidade_dados = len(df_acumulado)

	# Estatísticas para "Valor"
	valor_max_val = valor_numeric.max() if not valor_numeric.isna().all() else '-'
	valor_min_val = valor_numeric.min() if not valor_numeric.isna().all() else '-'
	valor_medio_val = valor_numeric.mean() if not valor_numeric.isna().all() else '-'
	valor_mediana_val = valor_numeric.median() if not valor_numeric.isna().all() else '-'

	# Estatísticas para "VU"
	vu_max_val = vu_numeric.max() if not vu_numeric.isna().all() else '-'
	vu_min_val = vu_numeric.min() if not vu_numeric.isna().all() else '-'
	vu_medio_val = vu_numeric.mean() if not vu_numeric.isna().all() else '-'
	vu_mediana_val = vu_numeric.median() if not vu_numeric.isna().all() else '-'

	# Criar texto com estatísticas
	stats_text = (
	f"Quantidade de dados: {quantidade_dados}\n\n"
	f"Valor:\n"
	f"- Máximo: {valor_max_val if isinstance(valor_max_val, str) else f'R$ {valor_max_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n"
	f"- Mínimo: {valor_min_val if isinstance(valor_min_val, str) else f'R$ {valor_min_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n"
	f"- Média: {valor_medio_val if isinstance(valor_medio_val, str) else f'R$ {valor_medio_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n"
	f"- Mediana: {valor_mediana_val if isinstance(valor_mediana_val, str) else f'R$ {valor_mediana_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n\n"
	f"VU (Valor Unitário - R$/m²):\n"
	f"- Máximo: {vu_max_val if isinstance(vu_max_val, str) else f'R$ {vu_max_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n"
	f"- Mínimo: {vu_min_val if isinstance(vu_min_val, str) else f'R$ {vu_min_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n"
	f"- Média: {vu_medio_val if isinstance(vu_medio_val, str) else f'R$ {vu_medio_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}\n"
	f"- Mediana: {vu_mediana_val if isinstance(vu_mediana_val, str) else f'R$ {vu_mediana_val:,.2f}'.replace(',', 'X').replace('.', ',').replace('X', '.')}"
	)

	return df_acumulado, df_acumulado, stats_text

	# Limpeza do anúncio atual (omitted for brevity, unchanged)
	def clear_fields():
	empty_df = pd.DataFrame({
	"Endereço": ['-'], "Área": ['-'], "Testada": ['-'], "Valor": ['-'], "VU": ['-'],
	"Dorm": ['-'], "Banheiros": ['-'], "Vagas": ['-'], "Suítes": ['-'], "URL": ['-'],
	"Topografia": ['-'], "Relevo": ['-'], "Superfície": ['-'], "Aproveitamento": ['-'],
	"Acessibilidade": ['-'], "Idade e conservação": ['-'], "Padrão construtivo": ['-'],
	"Outra característica": ['-']
	})
	return "", empty_df, "", False, None, '-', '-', '-', '-', '-', '-', '-', '-'


	# Função para excluir linhas com "Dado" vazio (omitted for brevity, unchanged)
	def excluir_dados_vazios(df_acumulado):
	if "Dado" not in df_acumulado.columns or df_acumulado.empty:
	return df_acumulado, df_acumulado
	df_acumulado = df_acumulado[df_acumulado["Dado"].astype(str).str.strip() != ""]
	df_acumulado = df_acumulado.reset_index(drop=True)
	# Re-numerar a coluna "Dado"
	for i in range(len(df_acumulado)):
	df_acumulado.loc[i, "Dado"] = f"Dado {i+1}"
	return df_acumulado, df_acumulado


	def toggle_output_text(show_text, result_text):
	return gr.update(visible=show_text), result_text

	# take_screenshot (omitted for brevity, unchanged from your last version)
	def take_screenshot(url, endereco, valor, filename="screenshot.png"):
	if not url or not validators.url(url):
	print("URL inválida para screenshot.")
	return None # Or a placeholder image path

	with sync_playwright() as p:
	browser = None
	context = None
	page = None
	try:
	browser = p.chromium.launch(
	headless=True,
	args=[
	'--disable-blink-features=AutomationControlled',
	'--no-sandbox',
	'--disable-dev-shm-usage',
	'--disable-gpu'
	]
	)
	context = browser.new_context(
	user_agent=random.choice(USER_AGENTS),
	viewport={'width': 1920, 'height': 1080},
	locale='pt-BR',
	timezone_id='America/Sao_Paulo',
	)
	page = context.new_page()
	stealth_sync(page)

	print(f"Taking screenshot of {url}")
	page.goto(url, timeout=60000, wait_until="networkidle") # networkidle might be better for screenshots
	# time.sleep(random.uniform(3,6)) # Extra wait if networkidle isn't enough

	if endereco and valor and str(endereco).strip() != '-' and isinstance(valor, (int, float)):
	safe_endereco = re.sub(r'[\\/*?:"<>\|]', "", str(endereco)).replace(' ', '_')[:100] # Limit length
	filename = f"{safe_endereco}_R${valor:.2f}.png".replace(",", "_") # Use underscore for comma
	else:
	# Generate a more unique default name
	timestamp = time.strftime("%Y%m%d-%H%M%S")
	filename = f"screenshot_{timestamp}.png"

	# Ensure the directory for screenshots exists if not saving to current dir
	# os.makedirs("screenshots", exist_ok=True)
	# screenshot_path = os.path.join("screenshots", filename)
	# For Gradio, relative path is fine if it's served correctly
	screenshot_path = filename

	page.screenshot(path=screenshot_path, full_page=True)
	print(f"Screenshot salvo como {screenshot_path}")
	return screenshot_path
	except Exception as e:
	print(f"Error during screenshot for {url}: {e}")
	return None
	finally:
	if page:
	try: page.close()
	except Exception: pass
	if context:
	try: context.close()
	except Exception: pass
	if browser:
	try: browser.close()
	except Exception: pass


	# Tema
	theme = gr.themes.Default(primary_hue=gr.themes.colors.yellow, secondary_hue=gr.themes.colors.blue) # Using Default for broader compatibility

	# App principal
	with gr.Blocks(theme=theme, css="""
	/* CSS (omitted for brevity, unchanged) */
	@import url('https://fonts.googleapis.com/css2?family=Quicksand:wght@400;700&display=swap');
	.small-file-upload { height: 65px; text-align: center; color: black; border: 2px solid black !important; box-sizing: border-box; }
	.small-file-upload span { display: none; }
	.small-file-upload input[type="file"] { color: black; }
	.small-file-upload label { color: black; }
	.small span { font-size: 1.0em; white-space: nowrap; width: auto; display: inline-block; } /* Adjusted font size */
	/* .small span dados { font-size: 0.8em; white-space: nowrap; width: auto; display: inline-block; } / / This specific selector might not work as intended, keep it general */
	h1 { text-align: center; font-family: 'Quicksand', sans-serif; font-weight: 700; margin: 20px 0; color: black; }
	.map-container { height: 600px !important; margin: 0; padding: 0; }
	""") as app:
	gr.Markdown(
	"<div style='font-size: 1.5em; text-align: center;'>" # Centered title
	"<span style='color: gray;'>Pesquisa.AI - </span>"
	"<span style='color: gray;'>aval</span>"
	"<span style='color: #FFD700;'>ia</span>" # Gold color
	"<span style='color: gray;'>.se</span>"
	"</div>"
	)

	df_acumulado_state = gr.State(pd.DataFrame(columns=["Dado", "Endereço", "Área", "Testada", "Valor", "VU", "Dorm", "Banheiros", "Vagas", "Suítes", "URL", "Topografia", "Relevo", "Superfície", "Aproveitamento", "Acessibilidade", "Idade e conservação", "Padrão construtivo", "Outra característica"]))


	with gr.Row():
	with gr.Column(scale=1, min_width=250): # Added min_width
	user_input = gr.Textbox(label="Cole a URL do anúncio aqui")
	gr.Markdown("ANÚNCIO ATUAL")
	submit_button = gr.Button("1. Carregar Dados do Anúncio", variant="primary", elem_id="load_button")
	screenshot_button = gr.Button("2. Print da Página do Anúncio", elem_id="print_button")
	clear_button = gr.Button("Limpar Campos Atuais", variant="stop", elem_id="clear_button") # Changed variant

	gr.Markdown("BANCO DE DADOS")
	add_data = gr.Button("3. Adicionar ao Banco", variant="primary", elem_id="add_db_button")
	delete_data = gr.Button("Excluir Linha Selecionada do Banco", elem_id="delete_db_button")
	# infos = gr.Button("Geolocalização", elem_id="geo_button") # If you implement this

	with gr.Column(scale=4): # Adjusted scale
	output_table = gr.Dataframe(
	headers=["Endereço", "Área", "Testada", "Valor", "VU", "Dorm", "Banheiros", "Vagas", "Suítes", "URL", "Topografia", "Relevo", "Superfície", "Aproveitamento", "Acessibilidade", "Idade e conservação", "Padrão construtivo", "Outra característica"],
	datatype=["str", "number", "number", "number", "number", "number", "number", "number", "number", "str", "str", "str", "str", "str", "str", "str", "str", "str"],
	interactive=True,
	row_count=(1, "fixed"), # Let row_count determine height for single row table
	wrap=True
	)

	with gr.Accordion("Características Adicionais (para adicionar ao banco)", open=False):
	with gr.Row():
	topo_drop = gr.Dropdown(label="Topografia", choices=list(dict_topo.keys()), value='-', interactive=True)
	rel_drop = gr.Dropdown(label="Relevo", choices=list(dict_rel.keys()), value='-', interactive=True)
	sup_drop = gr.Dropdown(label="Superfície", choices=list(dict_sup.keys()), value='-', interactive=True)
	apr_drop = gr.Dropdown(label="Aproveitamento", choices=list(dict_apr.keys()), value='-', interactive=True)
	with gr.Row():
	ace_drop = gr.Dropdown(label="Acessibilidade", choices=list(dict_ace.keys()), value='-', interactive=True)
	ic_drop = gr.Dropdown(label="Idade e conservação", choices=list(dict_ic.keys()), value='-', interactive=True)
	pad_drop = gr.Dropdown(label="Padrão construtivo", choices=list(dict_pad.keys()), value='-', interactive=True)
	var_drop = gr.Textbox(label="Outra característica", value='-', interactive=True)

	with gr.Row():
	show_text_checkbox = gr.Checkbox(label="Mostrar Texto Extraído do Anúncio", value=False, scale=4)
	screenshot_output = gr.Image(label="Print do Anúncio", type="filepath", height=200, scale=1, interactive=False) # Made non-interactive
	output_text = gr.Markdown(label="Texto Extraído (Pré-visualização)", visible=False)

	gr.Markdown("---") # Separator
	gr.Markdown("BANCO DE DADOS ACUMULADOS")
	acumulado_table = gr.Dataframe(
	headers=["Dado", "Endereço", "Área", "Testada", "Valor", "VU", "Dorm", "Banheiros", "Vagas", "Suítes", "URL", "Topografia", "Relevo", "Superfície", "Aproveitamento", "Acessibilidade", "Idade e conservação", "Padrão construtivo", "Outra característica"],
	datatype=["str", "str", "number", "number", "number", "number", "number", "number", "number", "number", "str", "str", "str", "str", "str", "str", "str", "str", "str"],
	interactive=True, # Set to true if you want to edit or select rows for deletion
	# height=400, # Removed height parameter
	wrap=True
	)
	# If 'acumulado_table' needs a specific height, try CSS or check Gradio docs for your version's method.
	# For Gradio 3.x: acumulado_table.style(height=400) might work if needed after the definition.

	stats_output = gr.Markdown(label="Estatísticas dos Dados Acumulados")

	# Lógica dos botões
	extracted_endereco_state = gr.State()
	extracted_valor_state = gr.State() # This will store the numerical value for screenshot naming

	submit_button.click(
	smart_fetch_url_info, # Use the new Playwright-based function
	inputs=user_input,
	outputs=[output_table, output_text, extracted_endereco_state, extracted_valor_state]
	)
	clear_button.click(
	clear_fields,
	outputs=[user_input, output_table, output_text, show_text_checkbox, screenshot_output, topo_drop, rel_drop, sup_drop, apr_drop, ace_drop, ic_drop, pad_drop, var_drop]
	)
	show_text_checkbox.change(
	toggle_output_text,
	inputs=[show_text_checkbox, output_text], # Pass current output_text to preserve it
	outputs=[output_text, output_text] # Update visibility and content
	)
	screenshot_button.click(
	take_screenshot,
	inputs=[user_input, extracted_endereco_state, extracted_valor_state], # user_input is the original URL
	outputs=screenshot_output
	)
	add_data.click(
	adicionar_ao_acumulado,
	inputs=[output_table, df_acumulado_state, topo_drop, rel_drop, sup_drop, apr_drop, ace_drop, ic_drop, pad_drop, var_drop],
	outputs=[df_acumulado_state, acumulado_table, stats_output]
	)
	delete_data.click(
	excluir_dados_vazios, # This function needs to know which row to delete
	inputs=acumulado_table, # Pass the editable dataframe
	outputs=[df_acumulado_state, acumulado_table] # Update state and table
	)

	# Modify excluir_dados_vazios to work with selected row from editable DataFrame
	# For row deletion, you'd typically make 'acumulado_table' interactive and get selected index
	# The current 'excluir_dados_vazios' removes ALL rows with empty "Dado" field, not selected ones.
	# To delete a selected row, 'acumulado_table' needs to be interactive and you'd get the selection event.
	# For simplicity, the current `excluir_dados_vazios` is kept, but it's not ideal for targeted deletion.
	# A more advanced deletion would require handling selection events from the DataFrame.

	app.launch(share=True, debug=True) # Added debug=True for more detailed logs during development