from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
# source env/bin/activate
with sync_playwright() as p:
= p.webkit.launch(headless=False, slow_mo=50)
browser = browser.new_page()
page "https://superkaka.se")
page.goto("a:has-text(\"About\")").click()
page.locator(=page.inner_html('.content')
html=BeautifulSoup(html, 'html.parser')
soup#print(soup.find_all("p"))
=soup.find('h2', {'class': 'anchored'}).text
stuffprint(f'Here is what is under {stuff}')
browser.close()
How to fetch information from a website in python
python
webscraping
selenium
playwright
Is Playwright any better than Selenium?
Page is under construction
Simple example
Open a browser, create a new page, go to an url.
Super simple :)
Example with a login
Go to a specific page, wait on table to load, scrape the table.
from playwright.sync_api import sync_playwright
import pandas as pd
with sync_playwright() as p:
= p.webkit.launch(headless=False, slow_mo=50)
browser = browser.new_page()
page "https://appforiarteam.shinyapps.io/Shiny_Plotly/")
page.goto("input#userName", "test")
page.fill("input#passwd", "test2")
page.fill("button[id=Login]")
page.click("a[href='#shiny-tab-data_vis']")
page.click(
# We wait for the table to load by clicking on the table when it appears
"tr[role='row']")
page.click(
=page.inner_html(".wrapper")
html=pd.read_html(html)
dfprint(df)
browser.close()