diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml deleted file mode 100644 index 666f1f33d6..0000000000 --- a/.github/workflows/development.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Development pipeline - -on: - push: - paths-ignore: - - ".gitignore" - - "README.md" - - "docs/**" - pull_request: - paths-ignore: - - ".gitignore" - - "README.md" - - "docs/**" - -jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.9", "3.10"] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi - - name: Setup environment - run: | - mv config/settings.yaml.dist config/settings.yaml - - name: Lint with pylint - run: | - pip3 install pylint - pylint main.py module - # - name: Test with pytest - # run: | - # pip3 install pytest - # pytest tests diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000..70f767025b --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,14 @@ +name: Linting + +on: + push: + paths-ignore: + - ".gitignore" + - "README.md" + - "docs/**" + +jobs: + build: + uses: unict-dmi/reusable-workflows/.github/workflows/python-lint.yml@main + with: + repo_ref: ${{ github.repository }} diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000000..24a9b69b7e --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,5 @@ +[settings] +profile = black +line_length = 88 +multi_line_output = 3 +include_trailing_comma = True \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index f129f4f2ce..3b715953fa 100644 --- a/.pylintrc +++ b/.pylintrc @@ -91,7 +91,7 @@ recursive=no # When enabled, pylint would attempt to guess common misconfiguration and emit # user-friendly hints instead of false-positive error messages. -suggestion-mode=yes +; suggestion-mode=yes # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. @@ -308,8 +308,8 @@ min-public-methods=2 [EXCEPTIONS] # Exceptions that will emit a warning when caught. -overgeneral-exceptions=BaseException, - Exception +overgeneral-exceptions=builtins.BaseException, + builtins.Exception [FORMAT] @@ -416,7 +416,13 @@ disable=raw-checker-failed, suppressed-message, useless-suppression, deprecated-pragma, - use-symbolic-message-instead + use-symbolic-message-instead, + missing-module-docstring, + missing-function-docstring, + missing-class-docstring, + missing-class-docstring, + too-many-locals + # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/main.py b/main.py index e25ad1c164..499734f8a4 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,9 @@ """Main module""" + import logging + from telegram.ext import Updater + from module.data import config_map from module.handlers import add_handlers, set_commands from module.jobs import add_jobs @@ -13,7 +16,9 @@ def setup_logging(logs_file: str) -> None: Args: logs_file: path to the log file """ - formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) logger = logging.getLogger() file_handler = logging.FileHandler(f"{logs_file}.log") @@ -34,8 +39,8 @@ def main() -> None: updater = Updater(config_map["token"]) set_commands(updater) - add_handlers(updater.dispatcher) - add_jobs(updater.job_queue) + add_handlers(updater.dispatcher) # type: ignore[has-type] + add_jobs(updater.job_queue) # type: ignore[has-type] updater.start_polling() updater.idle() diff --git a/module/data/__init__.py b/module/data/__init__.py index 12443c9ee1..68d63204eb 100644 --- a/module/data/__init__.py +++ b/module/data/__init__.py @@ -1,4 +1,5 @@ """Data module""" -from .constants import START_TEXT, CLEAR_LOGFILE_TEXT, DEFAULT_NOTICES_DATA + from .config import config_map +from .constants import CLEAR_LOGFILE_TEXT, DEFAULT_NOTICES_DATA, START_TEXT from .types import Config, GroupConfig, NoticeData, PageConfig diff --git a/module/data/config.py b/module/data/config.py index 3d11891f0d..fb9f6043c6 100644 --- a/module/data/config.py +++ b/module/data/config.py @@ -1,6 +1,9 @@ """Configuration map""" + import os + import yaml + from .types import Config @@ -14,7 +17,9 @@ def load_configurations(path: str = "config/") -> Config: Returns: dictionary containing the configuration data """ - with open(os.path.join(path, "settings.yaml"), "r", encoding="utf-8") as main_settings: + with open( + os.path.join(path, "settings.yaml"), "r", encoding="utf-8" + ) as main_settings: new_config = yaml.load(main_settings, Loader=yaml.SafeLoader) new_config["notices_groups"] = {} @@ -25,7 +30,9 @@ def load_configurations(path: str = "config/") -> Config: group_id = group_file_path.replace(".yaml", "") full_group_path = os.path.join(notices_groups_path, group_file_path) with open(full_group_path, "r", encoding="utf-8") as group_file: - new_config["notices_groups"][group_id] = yaml.load(group_file, Loader=yaml.SafeLoader) + new_config["notices_groups"][group_id] = yaml.load( + group_file, Loader=yaml.SafeLoader + ) return new_config diff --git a/module/data/constants.py b/module/data/constants.py index 99ddf97e5a..7072ab29ed 100644 --- a/module/data/constants.py +++ b/module/data/constants.py @@ -1,6 +1,6 @@ """Constant strings""" -from .types import NoticeData +from .types import NoticeData START_TEXT = ( "Benvenuto! Questo bot รจ stato realizzato dagli studenti di Informatica" diff --git a/module/data/types.py b/module/data/types.py index f476d018ff..bb70c91808 100644 --- a/module/data/types.py +++ b/module/data/types.py @@ -1,4 +1,5 @@ """Type definitions.""" + from typing import TypedDict diff --git a/module/handlers/__init__.py b/module/handlers/__init__.py index d074ab2fdd..f8b552f849 100644 --- a/module/handlers/__init__.py +++ b/module/handlers/__init__.py @@ -1,18 +1,20 @@ """Handlers module""" + from telegram import BotCommand from telegram.ext import ( - Updater, - Dispatcher, + CallbackQueryHandler, CommandHandler, - MessageHandler, + Dispatcher, Filters, - CallbackQueryHandler, + MessageHandler, + Updater, ) -from .start import start_cmd + from .chat_id import chat_id_cmd +from .clear_logfile import clear_logfile_cmd from .log import log_msg from .send_logfile import send_logfile_cmd -from .clear_logfile import clear_logfile_cmd +from .start import start_cmd def set_commands(updater: Updater) -> None: @@ -25,7 +27,7 @@ def set_commands(updater: Updater) -> None: BotCommand("start", "presentazione iniziale del bot"), BotCommand("chatid", "ottieni la chat id corrente"), ] - updater.bot.set_my_commands(commands=commands) + updater.bot.set_my_commands(commands=commands) # type: ignore[has-type] def add_handlers(dp: Dispatcher) -> None: diff --git a/module/handlers/chat_id.py b/module/handlers/chat_id.py index ee8b6c1f8f..53d3471c60 100644 --- a/module/handlers/chat_id.py +++ b/module/handlers/chat_id.py @@ -1,4 +1,5 @@ """/chatid command""" + from telegram import Update from telegram.ext import CallbackContext diff --git a/module/handlers/clear_logfile.py b/module/handlers/clear_logfile.py index 6eb4d67491..ae07755b0e 100644 --- a/module/handlers/clear_logfile.py +++ b/module/handlers/clear_logfile.py @@ -1,8 +1,11 @@ """/clear_logfile command""" + import logging + from telegram import Update from telegram.ext import CallbackContext -from module.data import config_map, CLEAR_LOGFILE_TEXT + +from module.data import CLEAR_LOGFILE_TEXT, config_map def clear_logfile_cmd(update: Update, context: CallbackContext) -> None: @@ -19,6 +22,10 @@ def clear_logfile_cmd(update: Update, context: CallbackContext) -> None: return logging.info("Clearing logfile...") - with open("logfile.log", "w", encoding="utf-8"): # overwrite the logfile with an empty file + with open( + "logfile.log", "w", encoding="utf-8" + ): # overwrite the logfile with an empty file pass - context.bot.send_message(chat_id=config_map["log_group_chatid"], text=CLEAR_LOGFILE_TEXT) + context.bot.send_message( + chat_id=config_map["log_group_chatid"], text=CLEAR_LOGFILE_TEXT + ) diff --git a/module/handlers/log.py b/module/handlers/log.py index 89f9e6cb18..a3ee95388b 100644 --- a/module/handlers/log.py +++ b/module/handlers/log.py @@ -1,4 +1,5 @@ """log handler""" + from telegram import Update from telegram.ext import CallbackContext diff --git a/module/handlers/send_logfile.py b/module/handlers/send_logfile.py index 774e721947..45b035c3d6 100644 --- a/module/handlers/send_logfile.py +++ b/module/handlers/send_logfile.py @@ -1,8 +1,11 @@ """/send_logfile command""" + import logging from datetime import datetime + from telegram import Update from telegram.ext import CallbackContext + from module.data import config_map diff --git a/module/handlers/start.py b/module/handlers/start.py index 6268c8654c..9e9ea22a27 100644 --- a/module/handlers/start.py +++ b/module/handlers/start.py @@ -1,6 +1,8 @@ """/start command""" + from telegram import Update from telegram.ext import CallbackContext + from module.data import START_TEXT diff --git a/module/jobs/__init__.py b/module/jobs/__init__.py index d5d57f61af..03058fd1fc 100644 --- a/module/jobs/__init__.py +++ b/module/jobs/__init__.py @@ -1,6 +1,9 @@ """Jobs module""" + from telegram.ext import JobQueue + from module.data import config_map + from .post_and_clear_log import post_and_clear_log_job from .update_notices import update_notices_job @@ -14,7 +17,11 @@ def add_jobs(job_queue: JobQueue) -> None: """ # logfile reset job_queue.run_repeating( - post_and_clear_log_job, interval=config_map["logfile_reset_interval_minutes"] * 60, first=5 + post_and_clear_log_job, + interval=config_map["logfile_reset_interval_minutes"] * 60, + first=5, ) # update tick - job_queue.run_repeating(update_notices_job, interval=config_map["update_interval"], first=5) + job_queue.run_repeating( + update_notices_job, interval=config_map["update_interval"], first=5 + ) diff --git a/module/jobs/post_and_clear_log.py b/module/jobs/post_and_clear_log.py index 1c3e5c45dd..381a676782 100644 --- a/module/jobs/post_and_clear_log.py +++ b/module/jobs/post_and_clear_log.py @@ -1,8 +1,11 @@ """Post and clear job""" + import logging from datetime import datetime + from telegram.ext import CallbackContext -from module.data import config_map, CLEAR_LOGFILE_TEXT + +from module.data import CLEAR_LOGFILE_TEXT, config_map def post_and_clear_log_job(context: CallbackContext) -> None: @@ -27,7 +30,11 @@ def post_and_clear_log_job(context: CallbackContext) -> None: logging.info("Deleting current logfile...") - with open("logfile.log", "w", encoding="utf-8"): # overwrite the logfile with an empty file + with open( + "logfile.log", "w", encoding="utf-8" + ): # overwrite the logfile with an empty file pass - context.bot.sendMessage(chat_id=config_map["log_group_chatid"], text=CLEAR_LOGFILE_TEXT) + context.bot.sendMessage( + chat_id=config_map["log_group_chatid"], text=CLEAR_LOGFILE_TEXT + ) diff --git a/module/jobs/update_notices.py b/module/jobs/update_notices.py index c12ed129fb..8de3910563 100644 --- a/module/jobs/update_notices.py +++ b/module/jobs/update_notices.py @@ -1,6 +1,9 @@ """Scraping job""" + import logging + from telegram.ext import CallbackContext + from module.data import config_map from module.scraping import scrape_group diff --git a/module/scraping/__init__.py b/module/scraping/__init__.py index bd38c664bc..d918fcdfc7 100644 --- a/module/scraping/__init__.py +++ b/module/scraping/__init__.py @@ -1,4 +1,5 @@ """Scraping module""" + from .notice import Notice -from .scraper_links import get_links from .scraper_group import scrape_group +from .scraper_links import get_links diff --git a/module/scraping/notice.py b/module/scraping/notice.py index 4ec9e91866..8bca15d273 100644 --- a/module/scraping/notice.py +++ b/module/scraping/notice.py @@ -53,25 +53,25 @@ def from_url(cls, label: str, url: str) -> "Notice | None": table_content += "\t".join(cols_text) + "\n" table.decompose() # remove table from content - title = cls.__get_title(cls, soup) + title = cls.__get_title(soup) content = soup.find("div", attrs={"class": "field-item even"}) - prof = cls.__get_prof(cls, soup) + prof = cls.__get_prof(soup) if title is not None and content is not None: - title = title.get_text() - content = content.get_text() + title_text = title.get_text() + content_text = content.get_text() - content = f"{content.strip()}\n{table_content}" + final_content = f"{content_text.strip()}\n{table_content}" if prof is not None: - title = f"[{prof}]\n{title}" + title_text = f"[{prof}]\n{title_text}" else: return None - title = f"\n{title}" + title_text = f"\n{title_text}" - return cls(label, title, content, url) + return cls(label, title_text, final_content, url) except ( requests.Timeout, requests.ConnectionError, @@ -83,7 +83,8 @@ def from_url(cls, label: str, url: str) -> "Notice | None": return None - def __get_prof(self, soup: bs4.BeautifulSoup) -> str | None: + @staticmethod + def __get_prof(soup: bs4.BeautifulSoup) -> str | None: """Returns the prof of the notice Args: soup: BeautifulSoup object of the page @@ -92,9 +93,12 @@ def __get_prof(self, soup: bs4.BeautifulSoup) -> str | None: """ goto_prof_text = "Vai alla scheda del prof. " prof = soup.find("a", text=lambda text: text and goto_prof_text in text) - return prof and prof.get_text().replace(goto_prof_text, "") + if isinstance(prof, bs4.Tag): + return prof.get_text().replace(goto_prof_text, "") + return None - def __get_title(self, soup: bs4.BeautifulSoup) -> bs4.BeautifulSoup | None: + @staticmethod + def __get_title(soup: bs4.BeautifulSoup) -> bs4.Tag | None: """Returns the title of the notice Args: soup: BeautifulSoup object of the page @@ -102,7 +106,10 @@ def __get_title(self, soup: bs4.BeautifulSoup) -> bs4.BeautifulSoup | None: the soup of the title """ title = soup.find("h1", attrs={"class": "page-title"}) - return title if title else soup.select_one("section#content h1") + if isinstance(title, bs4.Tag): + return title + result = soup.select_one("section#content h1") + return result if isinstance(result, bs4.Tag) else None @property def formatted_url(self) -> str: diff --git a/module/scraping/scraper_group.py b/module/scraping/scraper_group.py index 6c844fb125..e60d7df1ce 100644 --- a/module/scraping/scraper_group.py +++ b/module/scraping/scraper_group.py @@ -1,10 +1,14 @@ """Scrape groups""" + import logging import os import shutil + import yaml from telegram.ext import CallbackContext -from module.data import NoticeData, GroupConfig, DEFAULT_NOTICES_DATA, config_map + +from module.data import DEFAULT_NOTICES_DATA, GroupConfig, NoticeData, config_map + from .notice import Notice from .scraper_links import get_links @@ -24,7 +28,9 @@ def scrape_group(context: CallbackContext, group_key: str, group: GroupConfig) - logging.info("-- Page '%s'", page_key) # Generate page folder's path and subpaths - base_page_path = f"data/avvisi/{group_key.replace(' ', '_')}/{page_key.replace(' ', '_')}" + base_page_path = ( + f"data/avvisi/{group_key.replace(' ', '_')}/{page_key.replace(' ', '_')}" + ) data_file_path = f"{base_page_path}/notices_data.yaml" # Initialize folder and data file (if it doesn't exist) @@ -52,21 +58,28 @@ def scrape_group(context: CallbackContext, group_key: str, group: GroupConfig) - # If link has already been scraped # (implying that's invalid page or already posted notice), skip it - if link in notices_data["scraped_links"]: + if not isinstance(link, str) or link in notices_data["scraped_links"]: logging.info("Link is already present in the list") continue + # Type guard for mypy to ensure link is str + assert isinstance(link, str) + notice = Notice.from_url(page["label"], group["base_url"] + link) # If the notice is valid, # enqueue it to be sent in the channel or in an approval group if notice is not None: - logging.info("Link is valid and seems to contain a notice, spamming") + logging.info( + "Link is valid and seems to contain a notice, spamming" + ) notice.send(context, page["channels"]) else: logging.info("Link doesn't contain a valid notice") - context.bot.sendMessage(chat_id=config_map["log_group_chatid"], - text=f"Link doesn't contain a valid notice: {link}") + context.bot.sendMessage( + chat_id=config_map["log_group_chatid"], + text=f"Link doesn't contain a valid notice: {link}", + ) # Appends current link to scraped ones notices_data["scraped_links"].append(link) diff --git a/module/scraping/scraper_links.py b/module/scraping/scraper_links.py index 5d023a0a57..3c643a6a04 100644 --- a/module/scraping/scraper_links.py +++ b/module/scraping/scraper_links.py @@ -10,7 +10,7 @@ from module.data import config_map -def get_links(url: str) -> "list[str] | None": +def get_links(url: str) -> list[str | list[str] | None] | None: """Generates a list of links to the notices scraped from the page indicated by the url. Args: @@ -56,7 +56,7 @@ def get_links(url: str) -> "list[str] | None": link.get("href") for link in result if link.get("href") - and not any(exclude in link.get("href") for exclude in exclude_links) + and not any(exclude in str(link.get("href")) for exclude in exclude_links) ] return links diff --git a/requirements.txt b/requirements.txt index efc2191ede..fb90e6000f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ APScheduler==3.6.3 -astroid==2.12.12 beautifulsoup4==4.9.3 cachetools==4.2.2 certifi==2020.12.5 @@ -8,7 +7,6 @@ chardet==4.0.0 click==8.1.3 cryptography==3.4.7 decorator==5.0.5 -dill==0.3.6 idna==2.10 lazy-object-proxy==1.8.0 mccabe==0.7.0 @@ -25,5 +23,5 @@ tomli==2.0.1 tomlkit==0.11.6 tornado==6.1 tzlocal==2.1 -urllib3==1.26.4 +urllib3 wrapt==1.14.1 diff --git a/requirements_dev.txt b/requirements_dev.txt index e9a0ae21a5..ba76f1bf3e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,11 @@ black==22.10.0 isort==5.10.1 mypy-extensions==0.4.3 -pylint==2.15.5 +pylint==4.0.4 pytest +flake8 +mypy +typed-ast +types-PyYAML +types-beautifulsoup4 +types-requests diff --git a/tests/config_migration/test_config_migration.py b/tests/config_migration/test_config_migration.py index 6f4f0728ec..09c560d0bb 100644 --- a/tests/config_migration/test_config_migration.py +++ b/tests/config_migration/test_config_migration.py @@ -1,21 +1,22 @@ -import yaml, pytest +import pytest +import yaml -from module.config import load_configurations +from module.data.config import load_configurations -DATA_FOLDER = 'tests/config_migration/data' +DATA_FOLDER = "tests/config_migration/data" -@pytest.mark.parametrize("test_case_id", [ - "minimal", - "two_groups", - "disum", - "server_conf" -]) -def test_new_configuration_loading(test_case_id) -> None: - test_case_data_folder = f'{DATA_FOLDER}/{test_case_id}' - with open(f'{test_case_data_folder}/old/settings.yaml', 'r') as yaml_config: +@pytest.mark.parametrize( + "test_case_id", ["minimal", "two_groups", "disum", "server_conf"] +) +def test_new_configuration_loading(test_case_id: str) -> None: + test_case_data_folder = f"{DATA_FOLDER}/{test_case_id}" + + with open( + f"{test_case_data_folder}/old/settings.yaml", "r", encoding="utf-8" + ) as yaml_config: old_config_map = yaml.load(yaml_config, Loader=yaml.SafeLoader) - new_config_map = load_configurations(f'{test_case_data_folder}/new/') + new_config_map = load_configurations(f"{test_case_data_folder}/new/") - assert new_config_map == old_config_map \ No newline at end of file + assert new_config_map == old_config_map