See More

from __future__ import annotations import re from datetime import date, datetime from urllib.parse import quote from pydantic import BaseModel, Field, computed_field, field_validator, model_validator from src.config import Config from src.misc import EventType, Room, SpeakerQuestion, SubmissionQuestion from src.models.pretalx import PretalxAnswer class EuroPythonSpeaker(BaseModel): """ Model for EuroPython speaker data, transformed from Pretalx data """ code: str name: str biography: str | None = None avatar: str slug: str answers: list[PretalxAnswer] = Field(..., exclude=True) submissions: list[str] # Extracted affiliation: str | None = None homepage: str | None = None twitter_url: str | None = None mastodon_url: str | None = None linkedin_url: str | None = None bluesky_url: str | None = None gitx_url: str | None = None @computed_field def website_url(self) -> str: return ( f"https://ep{Config.event.split('-')[1]}.europython.eu/speaker/{self.slug}" ) @model_validator(mode="before") @classmethod def extract_answers(cls, values) -> dict: answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] for answer in answers: if answer.question_text == SpeakerQuestion.affiliation: values["affiliation"] = answer.answer_text if answer.question_text == SpeakerQuestion.homepage: values["homepage"] = answer.answer_text if answer.question_text == SpeakerQuestion.twitter: values["twitter_url"] = cls.extract_twitter_url(answer.answer_text) if answer.question_text == SpeakerQuestion.mastodon: values["mastodon_url"] = cls.extract_mastodon_url(answer.answer_text) if answer.question_text == SpeakerQuestion.bluesky: values["bluesky_url"] = cls.extract_bluesky_url(answer.answer_text) if answer.question_text == SpeakerQuestion.linkedin: values["linkedin_url"] = cls.extract_linkedin_url(answer.answer_text) if answer.question_text == SpeakerQuestion.gitx: values["gitx_url"] = cls.extract_gitx_url(answer.answer_text) return values @staticmethod def extract_twitter_url(text: str) -> str | None: """ Extracts a Twitter profile URL from the given text. Cleans the input and handles following formats: - @username - username - twitter.com/username - x.com/username """ cleaned = EuroPythonSpeaker._clean_social_input(text) if cleaned is None: print(f"Invalid Twitter URL: {text}") return None # https://twitter.com/username (username max 15 chars) match = re.match(r"^(twitter\.com|x\.com)/([\w]{1,15})$", cleaned) if match: _, username = match.groups() return f"https://x.com/{username}" # only username if re.match(r"^[\w]{1,15}$", cleaned): return f"https://x.com/{cleaned}" print(f"Invalid Twitter URL: {cleaned}") return None @staticmethod def extract_mastodon_url(text: str) -> str | None: """ Extracts a Mastodon profile URL from the given text. Supports formats like: - @username@instance - username@instance - instance/@username - instance/@username@instance (with redirect) Returns: https:///@ """ cleaned = EuroPythonSpeaker._clean_social_input(text) if not cleaned: print(f"Invalid Mastodon URL: {text}") return None # instance/@username match = re.match(r"^([\w\.-]+)/@([\w\.-]+)$", cleaned) if match: instance, username = match.groups() return f"https://{instance}/@{username}" parts = cleaned.split("@") if len(parts) == 3: # instance@username@instance _, username, instance = parts elif len(parts) == 2: # username@instance username, instance = parts else: print(f"Invalid Mastodon URL: {cleaned}") return None if username and instance: return f"https://{instance}/@{username}" print(f"Invalid Mastodon URL: {cleaned}") return None @staticmethod def extract_linkedin_url(text: str) -> str | None: """ Extracts a LinkedIn personal profile URL from the given text. Cleans the input and handles formats like: - username - linkedin.com/in/username - @username - tr.linkedin.com/in/username (country subdomains) """ cleaned = EuroPythonSpeaker._clean_social_input(text) if cleaned is None: print(f"Invalid LinkedIn URL: {text}") return None if cleaned.startswith("in/"): linkedin_url = f"https://linkedin.com/{cleaned}" elif not cleaned.startswith(("linkedin.", "in/")) and "." not in cleaned: linkedin_url = f"https://linkedin.com/in/{cleaned}" else: linkedin_url = f"https://{cleaned}" if not re.match( r"^https://([\w-]+\.)?linkedin\.com/in/(?:[\w\-]|%[0-9A-Fa-f]{2})+(?:/[\w\-]+)*$", linkedin_url, ): print(f"Invalid LinkedIn URL: {linkedin_url}") return None return linkedin_url @staticmethod def extract_bluesky_url(text: str) -> str | None: """ Extracts a Bluesky profile URL from the given text. Cleans the input and handles formats like: - username - bsky.app/profile/username - bsky/username - username.dev - @username - username.bsky.social """ cleaned = EuroPythonSpeaker._clean_social_input(text) if cleaned is None: print(f"Invalid Bluesky URL: {text}") return None for marker in ("bsky.app/profile/", "bsky/"): if marker in cleaned: cleaned = cleaned.split(marker, 1)[1] break else: cleaned = cleaned.rsplit("/", 1)[-1] if "." not in cleaned: cleaned += ".bsky.social" bluesky_url = f"https://bsky.app/profile/{cleaned}" if not re.match(r"^https://bsky\.app/profile/[\w\.-]+\.[\w\.-]+$", bluesky_url): print(f"Invalid Bluesky URL: {bluesky_url}") return None return bluesky_url @staticmethod def extract_gitx_url(text: str) -> str | None: """ Extracts a GitHub/GitLab URL from the given text. Cleans the input and handles formats like: - username - github.com/username - gitlab.com/username - @username """ cleaned = EuroPythonSpeaker._clean_social_input(text) if cleaned is None: print(f"Invalid GitHub/GitLab URL: {text}") return None if cleaned.startswith(("github.com/", "gitlab.com/")): return f"https://{cleaned}" if re.match(r"^[\w-]+$", cleaned): # assume github.com return f"https://github.com/{cleaned}" print(f"Invalid GitHub/GitLab URL: {cleaned}") return None @staticmethod def _is_blank_or_na(text: str) -> bool: """ Check if the text is blank or (equals "N/A" or "-") """ return not text or text.strip().lower() in {"n/a", "-"} @staticmethod def _clean_social_input(text: str) -> str | None: """ Cleans the input string for social media URLs. Returns None if the input is blank or "N/A", removes prefixes like "LinkedIn: " or "GH: ", removes parameters like "?something=true", removes trailing slashes, removes "http://" or "https://", removes "www." prefix, removes "@" prefix, removes invisible Unicode control characters, and decodes URL-encoded characters. """ if EuroPythonSpeaker._is_blank_or_na(text): print(f"Blank or N/A input: {text}") return None # Strip leading/trailing whitespace text = text.strip() # Remove any text prefix like "LinkedIn: " or "GH: " text = text.split(" ", 1)[1] if ": " in text else text # Remove query strings and trailing commas or slashes text = text.split("?", 1)[0] text = text.split(",", 1)[0] text = text.rstrip("/") # Remove URL schemes if text.startswith("https://"): text = text[8:] elif text.startswith("http://"): text = text[7:] # Remove "www." prefix if text.startswith("www."): text = text[4:] # Remove leading @ if text.startswith("@"): text = text[1:] # Remove invisible Unicode control characters (Bidi, LTR/RTL marks, etc.) invisible_chars = [ "\u200e", "\u200f", # LTR / RTL marks "\u202a", "\u202b", "\u202c", "\u202d", "\u202e", # Directional overrides "\u2066", "\u2067", "\u2068", "\u2069", # Isolates ] text = re.sub(f"[{''.join(invisible_chars)}]", "", text) # Percent-encode if needed (e.g., non-ASCII chars) if not text.isascii(): text = quote(text, safe="@/-_.+~#=:") return text.lower() if text else None class EuroPythonSession(BaseModel): """ Model for EuroPython session data, transformed from Pretalx data """ code: str title: str speakers: list[str] session_type: str slug: str track: str | None = None abstract: str = "" tweet: str = "" duration: str = "" level: str = "" delivery: str = "" resources: list[dict[str, str | None]] | None = None room: str | None = None start: datetime | None = None end: datetime | None = None answers: list[PretalxAnswer] = Field(..., exclude=True) sessions_in_parallel: list[str] | None = None sessions_after: list[str] | None = None sessions_before: list[str] | None = None next_session: str | None = None prev_session: str | None = None slot_count: int = Field(..., exclude=True) youtube_url: str | None = None @field_validator("room", mode="before") @classmethod def handle_poster_room(cls, value) -> str | None: if value and "Main Hall" in value: return "Exhibit Hall" return value @computed_field def website_url(self) -> str: return ( f"https://ep{Config.event.split('-')[1]}.europython.eu/session/{self.slug}" ) @model_validator(mode="before") @classmethod def extract_answers(cls, values) -> dict: answers = [PretalxAnswer.model_validate(ans) for ans in values["answers"]] for answer in answers: # TODO if we need any other questions if answer.question_text == SubmissionQuestion.tweet: values["tweet"] = answer.answer_text if answer.question_text == SubmissionQuestion.delivery: if answer.answer_text == "in-person at the conference": values["delivery"] = "in-person" elif answer.answer_text == "in-person or remote": values["delivery"] = "in-person or remote" else: values["delivery"] = "remote" if answer.question_text == SubmissionQuestion.level: values["level"] = answer.answer_text.lower() return values class EuroPythonScheduleSpeaker(BaseModel): """ Model for EuroPython schedule speaker data """ code: str name: str avatar: str slug: str website_url: str class EuroPythonScheduleSession(BaseModel): """ Model for EuroPython schedule session data """ event_type: EventType = EventType.SESSION code: str slug: str title: str session_type: str speakers: list[EuroPythonScheduleSpeaker] track: str | None tweet: str level: str total_duration: int = Field(..., exclude=True) rooms: list[Room] start: datetime slot_count: int = Field(..., exclude=True) website_url: str @computed_field def duration(self) -> int: return self.total_duration // self.slot_count class EuroPythonScheduleBreak(BaseModel): """ Model for EuroPython schedule break data """ event_type: EventType = EventType.BREAK title: str duration: int rooms: list[Room] start: datetime class DaySchedule(BaseModel): rooms: list[Room] events: list[EuroPythonScheduleSession | EuroPythonScheduleBreak] class Schedule(BaseModel): days: dict[date, DaySchedule] @classmethod def from_events( cls, events: list[EuroPythonScheduleSession | EuroPythonScheduleBreak] ) -> Schedule: day_dict = {} for event in events: event_date = event.start.date() if event_date not in day_dict: day_dict[event_date] = {"rooms": list(set(event.rooms)), "events": []} else: day_dict[event_date]["rooms"] = list( set(day_dict[event_date]["rooms"] + event.rooms) ) day_dict[event_date]["events"].append(event) # Registration session should cover all rooms for day in day_dict.values(): for event in day["events"]: if "Registration & Welcome" in event.title: event.rooms = list(set(day["rooms"])) day_schedule_dict = {k: DaySchedule(**v) for k, v in day_dict.items()} return cls(days=day_schedule_dict)