Xuliqin Practice1127-1

import matplotlib.pyplot as plt
import pandas as pd
import requests
from bs4 import BeautifulSoup
import textwrap
pd.read_csv(
    "https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/storms.csv", nrows=10
)
rownames name year month day hour lat long status category wind pressure tropicalstorm_force_diameter hurricane_force_diameter
0 1 Amy 1975 6 27 0 27.5 -79.0 tropical depression NaN 25 1013 NaN NaN
1 2 Amy 1975 6 27 6 28.5 -79.0 tropical depression NaN 25 1013 NaN NaN
2 3 Amy 1975 6 27 12 29.5 -79.0 tropical depression NaN 25 1013 NaN NaN
3 4 Amy 1975 6 27 18 30.5 -79.0 tropical depression NaN 25 1013 NaN NaN
4 5 Amy 1975 6 28 0 31.5 -78.8 tropical depression NaN 25 1012 NaN NaN
5 6 Amy 1975 6 28 6 32.4 -78.7 tropical depression NaN 25 1012 NaN NaN
6 7 Amy 1975 6 28 12 33.3 -78.0 tropical depression NaN 25 1011 NaN NaN
7 8 Amy 1975 6 28 18 34.0 -77.0 tropical depression NaN 30 1006 NaN NaN
8 9 Amy 1975 6 29 0 34.4 -75.8 tropical storm NaN 35 1004 NaN NaN
9 10 Amy 1975 6 29 6 34.0 -74.8 tropical storm NaN 40 1002 NaN NaN
url = "http://aeturrell.com/research"
page = requests.get(url)
page.text[:300]
'<!DOCTYPE html>\n<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>\n\n<meta charset="utf-8">\n<meta name="generator" content="quarto-1.6.39">\n\n<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">\n\n<meta name="author" content="Arthur Turrell">\n'
soup = BeautifulSoup(page.text, "html.parser")
print(soup.prettify()[60000:60500])
TJDdmFjYW5jaWVzJTJDQ09WSUQtMTk=" data-index="1" data-listing-date-modified-sort="NaN" data-listing-date-sort="1651359600000" data-listing-file-modified-sort="1687564711698" data-listing-reading-time-sort="1" data-listing-word-count-sort="182">
         <div class="project-content listing-pub-info">
          <p>
           Draca, Mirko, Emma Duchini, Roland Rathelot, Arthur Turrell, and Giulia Vattuone. Revolution in Progress? The Rise of Remote Work in the UK.
           <i>
            Univers
# Get all paragraphs
all_paras = soup.find_all("p")
# Just show one of the paras
all_paras[1]
<p>Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. "Pay transparency and gender equality." <i>American Economic Journal: Economic Policy</i> (2024). doi: <a href="https://www.aeaweb.org/articles?id=10.1257/pol.20220766&amp;from=f"><code>10.1257/pol.20220766</code></a></p>
all_paras[1].text
'Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. "Pay transparency and gender equality." American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766'
projects = soup.find_all("div", class_="project-content listing-pub-info")
projects = [x.text.strip() for x in projects]
projects[:4]
['Blundell, Jack, Emma Duchini, Stefania Simion, and Arthur Turrell. "Pay transparency and gender equality." American Economic Journal: Economic Policy (2024). doi: 10.1257/pol.20220766',
 'Botta, Federico, Robin Lovelace, Laura Gilbert, and Arthur Turrell. "Packaging code and data for reproducible research: A case study of journey time statistics." Environment and Planning B: Urban Analytics and City Science (2024): 23998083241267331. doi: 10.1177/23998083241267331',
 'Kalamara, Eleni, Arthur Turrell, Chris Redl, George Kapetanios, and Sujit Kapadia. "Making text count: economic forecasting using newspaper text." Journal of Applied Econometrics 37, no. 5 (2022): 896-919. doi: 10.1002/jae.2907',
 'Turrell, A., Speigner, B., Copple, D., Djumalieva, J. and Thurgood, J., 2021. Is the UK’s productivity puzzle mostly driven by occupational mismatch? An analysis using big data on job vacancies. Labour Economics, 71, p.102013. doi: 10.1016/j.labeco.2021.102013']
df_list = pd.read_html(
    "https://simple.wikipedia.org/wiki/FIFA_World_Cup", match="Sweden"
)
# Retrieve first and only entry from list of dataframes
df = df_list[0]
df.head()
Years Hosts Winners Score Runner's-up Third place Score.1 Fourth place
0 1930 Details Uruguay Uruguay 4 - 2 Argentina United States [note 1] Yugoslavia
1 1934 Details Italy Italy 2 - 1 Czechoslovakia Germany 3 - 2 Austria
2 1938 Details France Italy 4 - 2 Hungary Brazil 4 - 2 Sweden
3 1950 Details Brazil Uruguay 2 - 1 Brazil Sweden [note 2] Spain
4 1954 Details Switzerland West Germany 3 - 2 Hungary Austria 3 - 1 Uruguay