regex_test.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import re
  2. import json
  3. from typing import List, Dict
  4. def _parse_appointment_slots(html: str) -> List[Dict]:
  5. slots = []
  6. pattern = r'"availableAppointments\\":\s*(\[.*\]),\\"showFlexiAppointment'
  7. match = re.search(pattern, html, re.DOTALL)
  8. if match:
  9. json_str = match.group(1).replace(r'\"', '"')
  10. print(f'json_str={json_str}')
  11. data = json.loads(json_str)
  12. for day in data:
  13. d_str = day.get('day')
  14. for s in day.get('slots', []):
  15. labels = s.get('labels', [])
  16. lbl = ""
  17. stype = ""
  18. cost = ""
  19. if 'pta' in labels:
  20. lbl = 'pta'
  21. stype = "Prime"
  22. elif 'ptaw' in labels:
  23. lbl = 'ptaw'
  24. stype = "Prime Weekend"
  25. elif '' in labels:
  26. lbl = ''
  27. stype = "Standard"
  28. if lbl or not labels:
  29. slots.append({
  30. 'date': d_str,
  31. 'time': s.get('time'),
  32. 'label': lbl,
  33. 'type': stype,
  34. 'cost': cost
  35. })
  36. return slots
  37. else:
  38. print('Parsed appointment slot page, but not found availableAppointments')
  39. return slots
  40. f = open('../debug_pages/Tls_Query_Slot_Page_20260110_221803.html', 'r')
  41. html_content = f.read()
  42. f.close()
  43. slots = _parse_appointment_slots(html_content)
  44. print(slots)