import typing as t
import requests
from bs4 import BeautifulSoup
Data = t.Dict[str, t.List[t.Dict[str, str]]]
class Numbeo:
def __init__(self, country: str = 'China') -> None:
self._country = country
self._soup = None
self._data = None
self._cities = None
self._cache = {}
@property
def soup(self) -> BeautifulSoup:
if self._soup is None:
self._soup = self._load_soup()
return self._soup
@property
def data(self) -> Data:
if self._data is None:
self._data = self._load_data(self.soup)
return self._data
@property
def cities(self) -> t.List[str]:
if self._cities is None:
self._cities = self._load_cities(self.soup)
return self._cities
@property
def cache(self) -> t.Dict[str, Data]:
return self._cache
def city(self, name: str) -> Data:
if name not in self._cache:
url = 'https://www.numbeo.com/cost-of-living/city_result.jsp'
response = requests.get(url, params={'country': self._country, 'city': name})
soup = BeautifulSoup(response.content, 'html.parser')
self._cache[name] = self._load_data(soup)
return self._cache[name]
def _load_soup(self) -> BeautifulSoup:
url = 'https://www.numbeo.com/cost-of-living/country_result.jsp'
response = requests.get(url, params={'country': self._country})
return BeautifulSoup(response.content, 'html.parser')
def _load_data(self, soup: BeautifulSoup) -> Data:
data = {}
table = soup.find(class_='data_wide_table new_bar_table')
for row in table.find_all('tr'):
head = row.find('th')
if head is not None:
category = self._simplify(head.text)
data[category] = []
else:
name, value, _ = row.find_all('td')
data[category].append({
'name': self._simplify(name.text),
'value': self._simplify(value.text),
})
return data
def _load_cities(self, soup: BeautifulSoup) -> t.List[str]:
func = lambda option: option.attrs['value']
options = soup.find(id='city').find_all('option')
return list(filter(bool, map(func, options)))
def _simplify(self, text: str) -> str:
return text.replace('\xa0', '').strip()
if __name__ == '__main__':
import json
numbeo = Numbeo('China')
data = {
name: numbeo.city(name)
for name in numbeo.cities
}
with open('numbeo.json', 'w') as f:
json.dump(data, f, ensure_ascii=False)