from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
# source env/bin/activate
with sync_playwright() as p:
browser = p.webkit.launch(headless=False, slow_mo=50)
page = browser.new_page()
page.goto("https://superkaka.se")
page.locator("a:has-text(\"About\")").click()
html=page.inner_html('.content')
soup=BeautifulSoup(html, 'html.parser')
#print(soup.find_all("p"))
stuff=soup.find('h2', {'class': 'anchored'}).text
print(f'Here is what is under {stuff}')
browser.close()How to fetch information from a website in python
python
webscraping
selenium
playwright
Is Playwright any better than Selenium?
Page is under construction
Simple example
Open a browser, create a new page, go to an url.
Super simple :)
Example with a login
Go to a specific page, wait on table to load, scrape the table.
from playwright.sync_api import sync_playwright
import pandas as pd
with sync_playwright() as p:
browser = p.webkit.launch(headless=False, slow_mo=50)
page = browser.new_page()
page.goto("https://appforiarteam.shinyapps.io/Shiny_Plotly/")
page.fill("input#userName", "test")
page.fill("input#passwd", "test2")
page.click("button[id=Login]")
page.click("a[href='#shiny-tab-data_vis']")
# We wait for the table to load by clicking on the table when it appears
page.click("tr[role='row']")
html=page.inner_html(".wrapper")
df=pd.read_html(html)
print(df)
browser.close()