Spaces:
Paused
Paused
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, Response
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
app = Flask(__name__)
|
| 7 |
+
|
| 8 |
+
def create_proxy_url(original_url, base_proxy_url):
|
| 9 |
+
"""Creates a full proxy URL for a given original URL."""
|
| 10 |
+
# Ensure the URL is absolute
|
| 11 |
+
if original_url.startswith('//'):
|
| 12 |
+
original_url = 'https://' + original_url[2:]
|
| 13 |
+
return f"{base_proxy_url}?url={requests.utils.quote(original_url)}"
|
| 14 |
+
|
| 15 |
+
@app.route('/')
|
| 16 |
+
def proxy():
|
| 17 |
+
target_url = request.args.get('url')
|
| 18 |
+
if not target_url:
|
| 19 |
+
# Simple landing page for the proxy itself
|
| 20 |
+
return """
|
| 21 |
+
<!DOCTYPE html>
|
| 22 |
+
<html lang="en">
|
| 23 |
+
<head>
|
| 24 |
+
<meta charset="UTF-8">
|
| 25 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 26 |
+
<title>Proxy Service</title>
|
| 27 |
+
<style>
|
| 28 |
+
body { font-family: sans-serif; background-color: #121212; color: #e0e0e0; display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0; }
|
| 29 |
+
.container { text-align: center; padding: 2rem; background-color: #1e1e1e; border-radius: 8px; }
|
| 30 |
+
code { background-color: #2d2d2d; padding: 0.2rem 0.4rem; border-radius: 4px; }
|
| 31 |
+
</style>
|
| 32 |
+
</head>
|
| 33 |
+
<body>
|
| 34 |
+
<div class="container">
|
| 35 |
+
<h1>Proxy Service is Active</h1>
|
| 36 |
+
<p>Use this service by appending <code>?url=<website_url></code> to the URL.</p>
|
| 37 |
+
</div>
|
| 38 |
+
</body>
|
| 39 |
+
</html>
|
| 40 |
+
""", 200
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
# Use a session to handle cookies properly
|
| 44 |
+
session = requests.Session()
|
| 45 |
+
session.headers.update({
|
| 46 |
+
'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
|
| 47 |
+
})
|
| 48 |
+
|
| 49 |
+
# Make the request to the target URL
|
| 50 |
+
resp = session.get(target_url, stream=True)
|
| 51 |
+
|
| 52 |
+
# Get the content type to check if it's HTML
|
| 53 |
+
content_type = resp.headers.get('Content-Type', '').lower()
|
| 54 |
+
|
| 55 |
+
# These headers should be removed to allow embedding
|
| 56 |
+
excluded_headers = ['content-security-policy', 'x-frame-options', 'content-encoding']
|
| 57 |
+
headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
|
| 58 |
+
|
| 59 |
+
if 'text/html' in content_type:
|
| 60 |
+
# Get the base URL of the Hugging Face Space for rewriting links
|
| 61 |
+
# The SPACE_HOST variable is automatically provided by Hugging Face
|
| 62 |
+
proxy_base_url = f"https://{os.environ.get('SPACE_HOST')}/"
|
| 63 |
+
|
| 64 |
+
soup = BeautifulSoup(resp.content, 'html.parser')
|
| 65 |
+
base_tag = soup.new_tag('base', href=target_url)
|
| 66 |
+
soup.head.insert(0, base_tag)
|
| 67 |
+
|
| 68 |
+
# Rewrite all links and resource URLs to go through the proxy
|
| 69 |
+
for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src')]:
|
| 70 |
+
for t in soup.find_all(tag, **{attr: True}):
|
| 71 |
+
original_url = t[attr]
|
| 72 |
+
# Make relative URLs absolute before proxying
|
| 73 |
+
absolute_url = requests.compat.urljoin(target_url, original_url)
|
| 74 |
+
t[attr] = create_proxy_url(absolute_url, proxy_base_url)
|
| 75 |
+
|
| 76 |
+
content = str(soup)
|
| 77 |
+
return Response(content, resp.status_code, headers)
|
| 78 |
+
else:
|
| 79 |
+
# For non-HTML content (images, CSS, JS), stream it directly
|
| 80 |
+
return Response(resp.iter_content(chunk_size=1024), resp.status_code, headers)
|
| 81 |
+
|
| 82 |
+
except requests.exceptions.RequestException as e:
|
| 83 |
+
return f"Error fetching URL: {e}", 500
|
| 84 |
+
|
| 85 |
+
if __name__ == '__main__':
|
| 86 |
+
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
|