123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- from bs4 import BeautifulSoup
- import datetime
- import csv
- import sys
- import requests
-
- soup = BeautifulSoup(sys.stdin, "html.parser")
-
- table = soup.find_all("table")[1]
-
- rows = table.find_all("tr")
-
- the_date = ""
-
- sheet = csv.writer(sys.stdout)
-
- labels = [th.get_text().strip() for th in rows[0].find_all("th")]
-
- dups = [{"value": "", "count": 0} for not_used in labels]
-
- sheet.writerow(["Index"] + labels)
-
- index = 0
-
- for row in rows[1:]:
- cells = list(reversed([
- {"value": td.get_text().strip(), "rowspan": int(td.attrs.get("rowspan", "1"))}
- for td in row.find_all("td")
- ]))
- res = []
- for d in dups:
- if d["count"]:
- res.append(d["value"])
- d["count"] -= 1
- else:
- if cells:
- cell = cells.pop()
- else:
- cell = {"value": "", "rowspan": 1}
- res.append(cell["value"])
- if cell["rowspan"]>1:
- d["count"] = cell["rowspan"] - 1
- d["value"] = cell["value"]
- index += 1
- sheet.writerow([index] + res)
|