Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import json | |
| import re | |
| import time | |
| from urllib.parse import urljoin, quote | |
| import logging | |
| import urllib3 | |
| from requests.adapters import HTTPAdapter | |
| from urllib3.util.retry import Retry | |
| # Disable SSL warnings | |
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class PhoneDBScraper: | |
| def __init__(self): | |
| self.base_url = "https://phonedb.net" | |
| self.session = requests.Session() | |
| # Configure session with better headers and SSL handling | |
| self.session.headers.update({ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.9', | |
| 'Accept-Encoding': 'gzip, deflate, br', | |
| 'DNT': '1', | |
| 'Connection': 'keep-alive', | |
| 'Upgrade-Insecure-Requests': '1', | |
| }) | |
| # Set up retry strategy | |
| retry_strategy = Retry( | |
| total=3, | |
| status_forcelist=[429, 500, 502, 503, 504], | |
| allowed_methods=["HEAD", "GET", "OPTIONS"], # Updated parameter name | |
| backoff_factor=1 | |
| ) | |
| adapter = HTTPAdapter(max_retries=retry_strategy) | |
| self.session.mount("http://", adapter) | |
| self.session.mount("https://", adapter) | |
| # Disable SSL verification (use with caution) | |
| self.session.verify = False | |
| def search_phone(self, phone_name): | |
| """Search for a phone by name and return search results""" | |
| # Try different search approaches | |
| search_urls = [ | |
| f"{self.base_url}/index.php?m=device&s=query&q={quote(phone_name)}", | |
| f"{self.base_url}/search?q={quote(phone_name)}", | |
| f"{self.base_url}/index.php?m=device&s=list&q={quote(phone_name)}" | |
| ] | |
| for search_url in search_urls: | |
| try: | |
| logger.info(f"Trying search URL: {search_url}") | |
| response = self.session.get(search_url, timeout=30) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Find search results with multiple selectors | |
| results = [] | |
| # Look for various possible result containers | |
| selectors = [ | |
| 'div.device-item', | |
| 'div.device', | |
| 'div.phone-item', | |
| 'tr[onclick*="device"]', | |
| 'a[href*="device"]', | |
| 'a[href*="phone"]', | |
| 'td a[href*="index.php"]' | |
| ] | |
| search_results = [] | |
| for selector in selectors: | |
| found = soup.select(selector) | |
| if found: | |
| search_results.extend(found) | |
| break | |
| # Also try finding links with device IDs | |
| if not search_results: | |
| search_results = soup.find_all('a', href=re.compile(r'(device|phone|id=\d+)')) | |
| for result in search_results[:10]: # Limit to first 10 results | |
| title = "" | |
| link = "" | |
| if result.name == 'a': | |
| link = result.get('href', '') | |
| title = result.get_text(strip=True) or result.get('title', '') | |
| elif result.name in ['div', 'tr']: | |
| link_elem = result.find('a') | |
| if link_elem: | |
| link = link_elem.get('href', '') | |
| title = link_elem.get_text(strip=True) or result.get_text(strip=True) | |
| else: | |
| # Check for onclick events with device info | |
| onclick = result.get('onclick', '') | |
| if 'device' in onclick: | |
| # Extract device ID from onclick | |
| device_match = re.search(r'id=(\d+)', onclick) | |
| if device_match: | |
| link = f"/index.php?m=device&id={device_match.group(1)}" | |
| title = result.get_text(strip=True) | |
| # Clean up the link and title | |
| if link and title: | |
| # Clean title | |
| title = re.sub(r'\s+', ' ', title).strip() | |
| # Ensure absolute URL | |
| if link.startswith('/'): | |
| link = self.base_url + link | |
| elif not link.startswith('http'): | |
| link = f"{self.base_url}/{link}" | |
| # Filter relevant results | |
| if any(word.lower() in title.lower() for word in phone_name.split()): | |
| results.append({ | |
| 'title': title, | |
| 'url': link | |
| }) | |
| if results: | |
| logger.info(f"Found {len(results)} results using URL: {search_url}") | |
| return results | |
| except Exception as e: | |
| logger.warning(f"Search URL failed {search_url}: {e}") | |
| continue | |
| logger.error(f"All search methods failed for: {phone_name}") | |
| return [] | |
| def get_phone_specs(self, phone_url): | |
| """Extract detailed specifications from a phone page""" | |
| try: | |
| logger.info(f"Fetching specs from: {phone_url}") | |
| response = self.session.get(phone_url, timeout=30) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Extract phone data | |
| phone_data = { | |
| 'name': '', | |
| 'brand': '', | |
| 'images': [], | |
| 'specifications': {}, | |
| 'source_url': phone_url | |
| } | |
| # Get phone name from multiple possible locations | |
| title_candidates = [ | |
| soup.find('h1'), | |
| soup.find('h2'), | |
| soup.find('title'), | |
| soup.find('div', class_=re.compile(r'title|name|header')), | |
| soup.find('td', string=re.compile(r'Model|Name', re.I)) | |
| ] | |
| for candidate in title_candidates: | |
| if candidate: | |
| title = candidate.get_text(strip=True) | |
| if title and len(title) > 3: | |
| phone_data['name'] = title | |
| break | |
| # Extract brand from title or URL | |
| if phone_data['name']: | |
| phone_data['brand'] = phone_data['name'].split()[0] | |
| # Get images with multiple approaches | |
| images = [] | |
| # Look for images in various containers | |
| img_selectors = [ | |
| 'img[src*="phone"]', | |
| 'img[src*="device"]', | |
| 'img[src*="mobile"]', | |
| 'img[alt*="phone"]', | |
| 'img[alt*="device"]', | |
| '.device-image img', | |
| '.phone-image img', | |
| 'td img', | |
| 'div img' | |
| ] | |
| for selector in img_selectors: | |
| imgs = soup.select(selector) | |
| for img in imgs: | |
| src = img.get('src', '') | |
| if src: | |
| # Convert relative URLs to absolute | |
| if src.startswith('/'): | |
| img_url = self.base_url + src | |
| elif not src.startswith('http'): | |
| img_url = f"{self.base_url}/{src}" | |
| else: | |
| img_url = src | |
| # Avoid duplicates and filter out tiny images | |
| if img_url not in images and not any(x in src.lower() for x in ['icon', 'logo', 'button', 'spacer']): | |
| images.append(img_url) | |
| phone_data['images'] = images[:5] # Limit to 5 images | |
| # Extract specifications using multiple methods | |
| specs = {} | |
| # Method 1: PhoneDB specific table structure | |
| spec_tables = soup.find_all('table') | |
| for table in spec_tables: | |
| rows = table.find_all('tr') | |
| for row in rows: | |
| cells = row.find_all(['td', 'th']) | |
| if len(cells) >= 2: | |
| key = cells[0].get_text(strip=True) | |
| value = cells[1].get_text(strip=True) | |
| # Clean up key and value | |
| key = re.sub(r'[^\w\s]', '', key).strip() | |
| value = re.sub(r'\s+', ' ', value).strip() | |
| if key and value and len(key) < 100 and len(value) < 500: | |
| specs[key] = value | |
| # Method 2: Look for labeled specifications | |
| labeled_specs = soup.find_all(['dt', 'label', 'b', 'strong']) | |
| for label in labeled_specs: | |
| label_text = label.get_text(strip=True) | |
| if ':' in label_text: | |
| key, value = label_text.split(':', 1) | |
| specs[key.strip()] = value.strip() | |
| else: | |
| # Look for value in next sibling | |
| sibling = label.find_next_sibling() | |
| if sibling: | |
| value = sibling.get_text(strip=True) | |
| if value: | |
| specs[label_text] = value | |
| # Method 3: Extract common phone specifications from text | |
| text_content = soup.get_text() | |
| # Updated patterns for better matching | |
| spec_patterns = { | |
| 'Display Size': r'(\d+\.?\d*)\s*(?:inch|"|β³)', | |
| 'Display Resolution': r'(\d+)\s*[xΓ]\s*(\d+)', | |
| 'RAM': r'(\d+)\s*GB\s*(?:RAM|Memory)', | |
| 'Storage': r'(\d+)\s*GB\s*(?:storage|internal|ROM)', | |
| 'Battery': r'(\d+)\s*mAh', | |
| 'Main Camera': r'(\d+(?:\.\d+)?)\s*MP(?:\s+main|\s+primary|\s+rear)?', | |
| 'Front Camera': r'(\d+(?:\.\d+)?)\s*MP\s*(?:front|selfie|secondary)', | |
| 'Operating System': r'(Android|iOS)\s*[\d\.]*', | |
| 'Processor': r'(Snapdragon|Exynos|A\d+|Kirin|MediaTek|Dimensity)\s*[\w\d\s]*', | |
| 'Network': r'(2G|3G|4G|5G|LTE)', | |
| 'Weight': r'(\d+)\s*(?:g|gram)', | |
| 'Dimensions': r'(\d+\.?\d*)\s*[xΓ]\s*(\d+\.?\d*)\s*[xΓ]\s*(\d+\.?\d*)\s*mm' | |
| } | |
| for spec_name, pattern in spec_patterns.items(): | |
| if spec_name not in specs: # Don't override existing specs | |
| matches = re.findall(pattern, text_content, re.IGNORECASE) | |
| if matches: | |
| if spec_name == 'Display Resolution': | |
| specs[spec_name] = f"{matches[0][0]}x{matches[0][1]}" | |
| elif spec_name == 'Dimensions': | |
| specs[spec_name] = f"{matches[0][0]}Γ{matches[0][1]}Γ{matches[0][2]} mm" | |
| else: | |
| specs[spec_name] = matches[0] if isinstance(matches[0], str) else str(matches[0]) | |
| phone_data['specifications'] = specs | |
| logger.info(f"Extracted {len(specs)} specifications for {phone_data.get('name', 'Unknown')}") | |
| return phone_data | |
| except Exception as e: | |
| logger.error(f"Error extracting specs from {phone_url}: {e}") | |
| return None | |
| def scrape_phone_by_name(self, phone_name, get_first_result=True): | |
| """Main method to scrape phone specs by name""" | |
| logger.info(f"Searching for: {phone_name}") | |
| # Search for the phone | |
| search_results = self.search_phone(phone_name) | |
| if not search_results: | |
| logger.warning(f"No results found for: {phone_name}") | |
| return None | |
| results = [] | |
| # Process results | |
| targets = [search_results[0]] if get_first_result else search_results | |
| for result in targets: | |
| logger.info(f"Scraping: {result['title']}") | |
| phone_data = self.get_phone_specs(result['url']) | |
| if phone_data: | |
| results.append(phone_data) | |
| # Be respectful with requests | |
| time.sleep(1) | |
| return results[0] if get_first_result and results else results | |
| def scrape_multiple_phones(self, phone_names): | |
| """Scrape multiple phones and return structured JSON""" | |
| all_phones = [] | |
| for phone_name in phone_names: | |
| try: | |
| phone_data = self.scrape_phone_by_name(phone_name) | |
| if phone_data: | |
| all_phones.append(phone_data) | |
| time.sleep(2) # Be respectful between requests | |
| except Exception as e: | |
| logger.error(f"Error scraping {phone_name}: {e}") | |
| continue | |
| return all_phones | |
| def save_to_json(self, data, filename): | |
| """Save data to JSON file""" | |
| try: | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Data saved to {filename}") | |
| except Exception as e: | |
| logger.error(f"Error saving to JSON: {e}") | |
| # Example usage with error handling and alternative sites | |
| def main(): | |
| scraper = PhoneDBScraper() | |
| # Example 1: Scrape a single phone | |
| phone_name = "iPhone 15 Pro" | |
| print(f"Attempting to scrape: {phone_name}") | |
| result = scraper.scrape_phone_by_name(phone_name) | |
| if result: | |
| print(f"β Successfully scraped {result['name']}") | |
| print(f"Found {len(result['specifications'])} specifications") | |
| print(f"Found {len(result['images'])} images") | |
| print(json.dumps(result, indent=2)) | |
| scraper.save_to_json(result, f"{phone_name.replace(' ', '_')}_specs.json") | |
| else: | |
| print(f"β Failed to scrape {phone_name}") | |
| print("This might be due to:") | |
| print("1. PhoneDB.net blocking automated requests") | |
| print("2. Phone not found in their database") | |
| print("3. Site structure changes") | |
| print("\nAlternative solutions:") | |
| print("- Try with a different phone name") | |
| print("- Use a VPN if blocked by IP") | |
| print("- Consider using alternative sites like GSMArena") | |
| # Example 2: Test with multiple phones | |
| phone_list = [ | |
| "Samsung Galaxy S24", | |
| "Google Pixel 8", | |
| "OnePlus 12" | |
| ] | |
| print(f"\nTesting multiple phones: {phone_list}") | |
| results = scraper.scrape_multiple_phones(phone_list) | |
| if results: | |
| scraper.save_to_json(results, "multiple_phones_specs.json") | |
| print(f"β Successfully scraped {len(results)}/{len(phone_list)} phones") | |
| for phone in results: | |
| print(f"- {phone['name']}: {len(phone['specifications'])} specs, {len(phone['images'])} images") | |
| else: | |
| print("β No phones were successfully scraped") | |
| # Enhanced GSMArena scraper as main alternative | |
| class GSMArenaScraperAlternative: | |
| """Enhanced GSMArena scraper with full functionality""" | |
| def __init__(self): | |
| self.base_url = "https://www.gsmarena.com" | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
| 'Accept-Language': 'en-US,en;q=0.5', | |
| 'Accept-Encoding': 'gzip, deflate', | |
| 'Connection': 'keep-alive', | |
| }) | |
| def search_phone(self, phone_name): | |
| """Search GSMArena for phone""" | |
| search_url = f"{self.base_url}/results.php3" | |
| params = {'sQuickSearch': 'yes', 'sName': phone_name} | |
| try: | |
| logger.info(f"Searching GSMArena for: {phone_name}") | |
| response = self.session.get(search_url, params=params, timeout=30) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| results = [] | |
| # Find search results in makers section | |
| makers = soup.find_all('div', class_='makers') | |
| for maker in makers: | |
| links = maker.find_all('a') | |
| for link in links[:5]: # Limit results | |
| href = link.get('href', '') | |
| title = link.get_text(strip=True) | |
| if href and title and phone_name.lower().replace(' ', '') in title.lower().replace(' ', ''): | |
| full_url = self.base_url + '/' + href if not href.startswith('http') else href | |
| results.append({ | |
| 'title': title, | |
| 'url': full_url | |
| }) | |
| logger.info(f"Found {len(results)} results on GSMArena") | |
| return results | |
| except Exception as e: | |
| logger.error(f"GSMArena search failed: {e}") | |
| return [] | |
| def get_phone_specs(self, phone_url): | |
| """Extract detailed specifications from GSMArena phone page""" | |
| try: | |
| logger.info(f"Fetching specs from GSMArena: {phone_url}") | |
| response = self.session.get(phone_url, timeout=30) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| phone_data = { | |
| 'name': '', | |
| 'brand': '', | |
| 'images': [], | |
| 'specifications': {}, | |
| 'source_url': phone_url | |
| } | |
| # Get phone name | |
| title_elem = soup.find('h1', class_='specs-phone-name-title') | |
| if not title_elem: | |
| title_elem = soup.find('h1') or soup.find('title') | |
| if title_elem: | |
| phone_data['name'] = title_elem.get_text(strip=True) | |
| phone_data['brand'] = phone_data['name'].split()[0] if phone_data['name'] else '' | |
| # Get images | |
| images = [] | |
| # Main phone image | |
| main_img_container = soup.find('div', class_='specs-photo-main') | |
| if main_img_container: | |
| img = main_img_container.find('img') | |
| if img and img.get('src'): | |
| img_url = urljoin(phone_url, img['src']) | |
| images.append(img_url) | |
| # Additional images from carousel or gallery | |
| carousel = soup.find('div', class_='carousel-item') or soup.find('div', class_='specs-photos') | |
| if carousel: | |
| for img in carousel.find_all('img'): | |
| src = img.get('src', '') | |
| if src: | |
| img_url = urljoin(phone_url, src) | |
| if img_url not in images: | |
| images.append(img_url) | |
| phone_data['images'] = images[:5] | |
| # Extract specifications from GSMArena's table structure | |
| specs = {} | |
| # GSMArena uses specific table structure | |
| spec_tables = soup.find_all('table', cellspacing='0') | |
| for table in spec_tables: | |
| # Get category header | |
| category = '' | |
| category_elem = table.find_previous('th') or table.find_previous('h2') | |
| if category_elem: | |
| category = category_elem.get_text(strip=True) | |
| rows = table.find_all('tr') | |
| for row in rows: | |
| cells = row.find_all(['td', 'th']) | |
| if len(cells) >= 2: | |
| key = cells[0].get_text(strip=True) | |
| value = cells[1].get_text(strip=True) | |
| # Clean up the key and value | |
| key = re.sub(r'[^\w\s]', '', key).strip() | |
| value = re.sub(r'\s+', ' ', value).strip() | |
| if key and value and len(key) < 100: | |
| # Add category prefix if available | |
| final_key = f"{category} - {key}" if category and len(category) < 30 else key | |
| specs[final_key] = value | |
| # Also extract from the detailed specs list structure | |
| detail_lists = soup.find_all(['ul', 'li'], class_=re.compile(r'spec|detail')) | |
| for detail_list in detail_lists: | |
| items = detail_list.find_all('li') if detail_list.name == 'ul' else [detail_list] | |
| for item in items: | |
| text = item.get_text(strip=True) | |
| if ':' in text: | |
| parts = text.split(':', 1) | |
| if len(parts) == 2: | |
| key, value = parts | |
| specs[key.strip()] = value.strip() | |
| # Extract key specs using patterns from page text | |
| page_text = soup.get_text() | |
| key_patterns = { | |
| 'Display Size': r'(\d+\.?\d*)\s*(?:inch|")\s*display', | |
| 'Display Resolution': r'(\d+)\s*[xΓ]\s*(\d+)\s*pixels', | |
| 'RAM': r'(\d+)\s*GB\s*RAM', | |
| 'Storage': r'(\d+)\s*GB\s*(?:storage|internal)', | |
| 'Battery Capacity': r'(\d+)\s*mAh', | |
| 'Main Camera': r'(\d+(?:\.\d+)?)\s*MP\s*(?:main|primary|rear)', | |
| 'Front Camera': r'(\d+(?:\.\d+)?)\s*MP\s*front', | |
| 'Operating System': r'(Android|iOS)\s*([\d\.]+)?', | |
| 'Chipset': r'(Snapdragon|Exynos|A\d+|Kirin|MediaTek|Dimensity)\s*([\w\d\s]+)?', | |
| 'Weight': r'(\d+)\s*g\s*weight', | |
| 'Launch Date': r'(January|February|March|April|May|June|July|August|September|October|November|December)\s*(\d{4})' | |
| } | |
| for spec_name, pattern in key_patterns.items(): | |
| if spec_name not in specs: | |
| match = re.search(pattern, page_text, re.IGNORECASE) | |
| if match: | |
| if spec_name == 'Display Resolution': | |
| specs[spec_name] = f"{match.group(1)}Γ{match.group(2)}" | |
| elif spec_name == 'Launch Date': | |
| specs[spec_name] = f"{match.group(1)} {match.group(2)}" | |
| else: | |
| specs[spec_name] = match.group(0) | |
| phone_data['specifications'] = specs | |
| logger.info(f"Extracted {len(specs)} specifications for {phone_data.get('name', 'Unknown')}") | |
| return phone_data | |
| except Exception as e: | |
| logger.error(f"Error extracting GSMArena specs from {phone_url}: {e}") | |
| return None | |
| def scrape_phone_by_name(self, phone_name, get_first_result=True): | |
| """Main method to scrape phone specs by name from GSMArena""" | |
| search_results = self.search_phone(phone_name) | |
| if not search_results: | |
| logger.warning(f"No results found for: {phone_name}") | |
| return None | |
| results = [] | |
| targets = [search_results[0]] if get_first_result else search_results | |
| for result in targets: | |
| logger.info(f"Scraping: {result['title']}") | |
| phone_data = self.get_phone_specs(result['url']) | |
| if phone_data: | |
| results.append(phone_data) | |
| time.sleep(2) # Be respectful | |
| return results[0] if get_first_result and results else results | |
| def scrape_multiple_phones(self, phone_names): | |
| """Scrape multiple phones from GSMArena""" | |
| all_phones = [] | |
| for phone_name in phone_names: | |
| try: | |
| phone_data = self.scrape_phone_by_name(phone_name) | |
| if phone_data: | |
| all_phones.append(phone_data) | |
| time.sleep(3) # Be respectful between requests | |
| except Exception as e: | |
| logger.error(f"Error scraping {phone_name}: {e}") | |
| continue | |
| return all_phones | |
| def save_to_json(self, data, filename): | |
| """Save data to JSON file""" | |
| try: | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Data saved to {filename}") | |
| except Exception as e: | |
| logger.error(f"Error saving to JSON: {e}") | |
| def test_alternative_scraper(): | |
| """Test the enhanced GSMArena scraper""" | |
| print("\n" + "="*50) | |
| print("Testing Enhanced GSMArena Scraper") | |
| print("="*50) | |
| gsm_scraper = GSMArenaScraperAlternative() | |
| # Test single phone | |
| phone_name = "iPhone 15 Pro" | |
| print(f"Testing single phone: {phone_name}") | |
| result = gsm_scraper.scrape_phone_by_name(phone_name) | |
| if result: | |
| print(f"β Successfully scraped: {result['name']}") | |
| print(f"π± Found {len(result['specifications'])} specifications") | |
| print(f"πΌοΈ Found {len(result['images'])} images") | |
| # Show some key specs | |
| key_specs = ['Display Size', 'RAM', 'Storage', 'Battery Capacity', 'Main Camera'] | |
| print("\nπ Key Specifications:") | |
| for spec in key_specs: | |
| for key, value in result['specifications'].items(): | |
| if spec.lower() in key.lower(): | |
| print(f" β’ {key}: {value}") | |
| break | |
| # Save result | |
| gsm_scraper.save_to_json(result, f"{phone_name.replace(' ', '_')}_gsmarena_specs.json") | |
| else: | |
| print(f"β Failed to scrape {phone_name}") | |
| # Test multiple phones | |
| print(f"\n" + "-"*40) | |
| print("Testing Multiple Phones") | |
| print("-"*40) | |
| phone_list = ["Samsung Galaxy S24", "Google Pixel 8"] | |
| results = gsm_scraper.scrape_multiple_phones(phone_list) | |
| if results: | |
| print(f"β Successfully scraped {len(results)}/{len(phone_list)} phones") | |
| gsm_scraper.save_to_json(results, "multiple_phones_gsmarena_specs.json") | |
| for phone in results: | |
| print(f"π± {phone['name']}: {len(phone['specifications'])} specs, {len(phone['images'])} images") | |
| else: | |
| print("β No phones were successfully scraped") | |
| # Main function with both scrapers | |
| def main(): | |
| print("π Phone Specifications Scraper") | |
| print("="*50) | |
| # Try PhoneDB first | |
| try: | |
| print("Attempting PhoneDB scraper...") | |
| scraper = PhoneDBScraper() | |
| phone_name = "iPhone 15 Pro" | |
| result = scraper.scrape_phone_by_name(phone_name) | |
| if result: | |
| print(f"β PhoneDB: Successfully scraped {result['name']}") | |
| scraper.save_to_json(result, f"{phone_name.replace(' ', '_')}_phonedb_specs.json") | |
| return | |
| else: | |
| print("β PhoneDB scraper failed, trying GSMArena...") | |
| except Exception as e: | |
| print(f"β PhoneDB initialization failed: {str(e)}") | |
| print("π Switching to GSMArena scraper...") | |
| # Use GSMArena as fallback | |
| test_alternative_scraper() | |
| if __name__ == "__main__": | |
| # main() | |
| # Uncomment the line below to test GSMArena alternative | |
| test_alternative_scraper() | |