from bs4 import BeautifulSoup import datetime import csv import sys import requests soup = BeautifulSoup(sys.stdin, "html.parser") table = soup.find_all("table")[1] rows = table.find_all("tr") the_date = "" sheet = csv.writer(sys.stdout) labels = [th.get_text().strip() for th in rows[0].find_all("th")] dups = [{"value": "", "count": 0} for not_used in labels] sheet.writerow(["Index"] + labels) index = 0 for row in rows[1:]: cells = list(reversed([ {"value": td.get_text().split("\xa0")[0].strip(), "rowspan": int(td.attrs.get("rowspan", "1"))} for td in row.find_all("td") ])) res = [] for d in dups: if d["count"]: res.append(d["value"]) d["count"] -= 1 else: if cells: cell = cells.pop() else: cell = {"value": "", "rowspan": 1} res.append(cell["value"]) if cell["rowspan"]>1: d["count"] = cell["rowspan"] - 1 d["value"] = cell["value"] index += 1 sheet.writerow([index] + res)