Files
mdscrap/scraper/dynamic_scraper.py
Beyhan Oğur 9630a33ec1 first commit
2026-04-26 22:00:50 +03:00

50 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
from playwright.sync_api import sync_playwright
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
class DynamicScraper:
def __init__(self, headless: bool = True):
self.headless = headless
def run_scraper(self, url: str) -> str | None:
"""Belirtilen URL'yi Playwright ile açar ve tam yüklenmiş HTML'i döndürür."""
try:
with sync_playwright() as p:
# Chromium tarayıcısını başlat
browser = p.chromium.launch(headless=self.headless)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = context.new_page()
logging.info(f"Playwright ile sayfa açılıyor: {url}")
# networkidle: Ağ trafiği durana kadar bekle (JS ile yüklenen veriler için iyi bir seçenektir)
page.goto(url, wait_until="networkidle", timeout=30000)
# İsterseniz burada belirli bir elementin yüklenmesini bekleyebilirsiniz:
# page.wait_for_selector("h1", timeout=5000)
html_content = page.content()
browser.close()
logging.info(f"Sayfa başarıyla yüklendi: {url}")
return html_content
except Exception as e:
logging.error(f"Playwright ile sayfa çekilirken hata oluştu ({url}): {e}")
return None
def extract_title_with_playwright(self, url: str) -> str | None:
"""Sadece title'ı çekmek için kısa bir örnek."""
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=self.headless)
page = browser.new_page()
page.goto(url)
title = page.title()
browser.close()
return title
except Exception as e:
logging.error(f"Title alınamadı: {e}")
return None