From 83e012a4fc66563893a4264c22a6b15a07a3a495 Mon Sep 17 00:00:00 2001 From: Wil T Date: Fri, 11 Apr 2025 22:15:43 -0400 Subject: [PATCH] Ci updates (#85) * remove tox and travis * add dependabot * update version numbers * updated ci workflow * ruff fixes * support python 3.13 patches * fix dependency install commands * add pypi badges * add dev requirements file * remove pip-install and mypy jobs * cleanup workflow commands * read-all workflow permissions * better workflow triggering * more permissive requirements * install requirements-dev.txt in workflow * don't catch overly broad Exception in add cookie * add codeql workflow * add dependency review workflow * update gitignore * update to project config in pyproject.toml * use 160 line length for ruff * add pre-commit config * updated ci workflow * allow sudo in pytest step (for headless) * apply pyupgrade fixes * add allowed-endpoints to pytest harden-runner * set harden runner to audit mode for pytest * rename workflow to python.yaml * faster pytests with xdist * fix allowed-endpoints in coverage job * try block in pytest harden runner again * run on pull requests against any branch * fix pytest-xdist not being used --- .github/dependabot.yml | 16 ++ .github/workflows/codeql.yml | 50 ++++ .github/workflows/coverage.yml | 38 +++ .github/workflows/dependency-review.yml | 33 +++ .github/workflows/python.yml | 315 ++++++++++++++++++++++ .gitignore | 78 +++++- .pre-commit-config.yaml | 31 +++ .travis.yml | 38 --- MANIFEST.in | 5 - README.md | 5 +- pyproject.toml | 96 +++++-- requestium/requestium.py | 155 +++++------ requirements.txt | 4 - setup.py | 38 --- tests/test_ensure_elements_deprecation.py | 15 +- tests/test_requestium.py | 18 +- tox.ini | 14 - 17 files changed, 720 insertions(+), 229 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/coverage.yml create mode 100644 .github/workflows/dependency-review.yml create mode 100644 .github/workflows/python.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 .travis.yml delete mode 100644 MANIFEST.in delete mode 100644 requirements.txt delete mode 100644 setup.py delete mode 100644 tox.ini diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..d21cf00 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..44decc1 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,50 @@ +name: CodeQL analysis + +on: + push: + branches: + - main + - master + pull_request: + schedule: + - cron: 0 0 * * 1 + workflow_dispatch: + +permissions: read-all + +jobs: + analyze: + name: CodeQL analysis + + permissions: + security-events: write + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + language: + - python + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + api.github.com:443 + github.com:443 + objects.githubusercontent.com:443 + uploads.github.com:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: github/codeql-action/init@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + with: + languages: ${{ matrix.language }} + + - uses: github/codeql-action/autobuild@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + + - uses: github/codeql-action/analyze@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + with: + category: /language:${{matrix.language}} diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000..36bf834 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,38 @@ +name: Post coverage comment + +on: + workflow_run: + workflows: ["Python checks"] + types: + - completed + +permissions: + contents: read + +jobs: + test: + name: Run tests & display coverage + + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' + + permissions: + # Gives the action the necessary permissions for publishing new + # comments in pull requests. + pull-requests: write + # Gives the action the necessary permissions for editing existing + # comments (to avoid publishing multiple comments in the same PR) + contents: write + # Gives the action the necessary permissions for looking up the + # workflow that launched this workflow, and download the related + # artifact that contains the comment to be published + actions: read + + steps: + # DO NOT run actions/checkout here, for security reasons + # For details, refer to https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ + - name: Post comment + uses: py-cov-action/python-coverage-comment-action@b2eb38dd175bf053189b35f738f9207278b00925 # v3.29 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_RUN_ID: ${{ github.event.workflow_run.id }} diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 0000000..a2f025d --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,33 @@ +name: Dependency review + +on: + push: + branches: + - main + - master + pull_request: + schedule: + - cron: 0 0 * * 1 + workflow_dispatch: + +permissions: read-all + +jobs: + dependency-review: + name: Dependency review + + runs-on: ubuntu-latest + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + api.github.com:443 + api.securityscorecards.dev:443 + github.com:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/dependency-review-action@ce3cf9537a52e8119d91fd484ab5b8a807627bf8 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 0000000..fb04462 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,315 @@ +name: Python checks + +on: + push: + branches: + - main + - master + pull_request: + schedule: + - cron: 0 0 * * 1 + workflow_dispatch: + +permissions: read-all + +jobs: + test: + name: Pytest testing + + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: + - '3.9' + - '3.10' + - '3.11' + - '3.12' + - '3.13' + os: + - ubuntu-latest + - windows-latest + - macos-latest + + permissions: + contents: write + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: false + egress-policy: block + allowed-endpoints: > + files.pythonhosted.org:443 + github.com:443 + pypi.org:443 + + # accounts.google.com:443 + # clients2.google.com:80 + # optimizationguide-pa.googleapis.com:443 + + detectportal.firefox.com:80 + content-signature-2.cdn.mozilla.net:443 + firefox-settings-attachments.cdn.mozilla.net:443 + firefox.settings.services.mozilla.com:443 + + raw.githubusercontent.com:443 + googlechromelabs.github.io:443 + + packages.microsoft.com:443 + + azure.archive.ubuntu.com:80 + esm.ubuntu.com:443 + + # plausible.io:443 + + r10.o.lencr.org:80 + r11.o.lencr.org:80 + + nel.heroku.com:443 + the-internet.herokuapp.com:443 + the-internet.herokuapp.com:80 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - uses: install-pinned/uv@95e90cc576af729d90be7d0233d3452899eef976 + + - run: uv pip install --system -e .[dev] + + - id: cache-pytest + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 + with: + path: .pytest_cache + key: ${{ runner.os }}-pytest-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }} + + - name: Run pytest (with headless support) + uses: GabrielBB/xvfb-action@5bcda06da84ba084708898801da79736b88e00a9 + env: + COVERAGE_FILE: .coverage.${{ runner.os }}.${{ matrix.python-version }} + with: + run: pytest + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: coverage-${{ runner.os }}${{ matrix.python-version }} + path: .coverage.${{ runner.os }}.${{ matrix.python-version }} + include-hidden-files: true + + ruff-format: + name: Ruff formatting + + runs-on: ubuntu-latest + + permissions: + pull-requests: write + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + files.pythonhosted.org:443 + github.com:443 + pypi.org:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 + with: + python-version: '3.13' + cache: pip + + - uses: install-pinned/uv@95e90cc576af729d90be7d0233d3452899eef976 + + - run: uv pip install --system -e .[dev] + + - id: cache-ruff + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 + with: + path: .ruff_cache + key: ${{ runner.os }}-ruff-3.13-${{ hashFiles('pyproject.toml') }} + + - id: run-ruff + run: ruff format --diff . + + ruff-check: + name: Ruff linting + + runs-on: ubuntu-latest + + permissions: + pull-requests: write + security-events: write + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + api.github.com:443 + files.pythonhosted.org:443 + github.com:443 + pypi.org:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 + with: + python-version: '3.13' + cache: pip + + - uses: install-pinned/uv@95e90cc576af729d90be7d0233d3452899eef976 + + - run: uv pip install --system -e .[dev] + + - id: cache-ruff + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 + with: + path: .ruff_cache + key: ${{ runner.os }}-ruff-3.13-${{ hashFiles('pyproject.toml') }} + + - id: run-ruff-sarif + run: | + ruff check --output-format=sarif -o results.sarif . + + - uses: github/codeql-action/upload-sarif@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + if: ( success() || failure() ) && contains('["success", "failure"]', steps.run-ruff-sarif.outcome) + with: + sarif_file: results.sarif + + - id: run-ruff + if: failure() && contains('["failure"]', steps.run-ruff-sarif.outcome) + run: | + ruff check --output-format=github . + + bandit: + name: Bandit security + + runs-on: ubuntu-latest + + permissions: + security-events: write + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + api.github.com:443 + files.pythonhosted.org:443 + github.com:443 + pypi.org:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 + with: + python-version: '3.13' + cache: pip + + - uses: install-pinned/uv@95e90cc576af729d90be7d0233d3452899eef976 + + - run: uv pip install --system -e .[dev] + + - id: run-bandit-sarif + run: > + bandit --confidence-level 'medium' --format 'sarif' --output 'results.sarif' --recursive 'requestium' + + - uses: github/codeql-action/upload-sarif@1b549b9259bda1cb5ddde3b41741a82a2d15a841 + if: ( success() || failure() ) && contains('["success", "failure"]', steps.run-bandit-sarif.outcome) + with: + sarif_file: results.sarif + + - id: run-bandit + if: failure() && contains('["failure"]', steps.run-bandit-sarif.outcome) + run: > + bandit --confidence-level 'medium' --recursive 'requestium' + + coverage: + runs-on: ubuntu-latest + needs: test + permissions: + pull-requests: write + contents: write + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + api.github.com:443 + github.com:443 + img.shields.io:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e + with: + pattern: coverage-* + merge-multiple: true + + - name: Coverage comment + id: coverage_comment + uses: py-cov-action/python-coverage-comment-action@d1ff8fbb5ff80feedb3faa0f6d7b424f417ad0e1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + MERGE_COVERAGE_FILES: true + + - name: Store Pull Request comment to be posted + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + if: steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true' + with: + name: python-coverage-comment-action + path: python-coverage-comment-action.txt + + pre-commit: + runs-on: ubuntu-latest + needs: + - ruff-format + - ruff-check + - bandit + permissions: + contents: write + + steps: + - uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf + with: + disable-sudo: true + egress-policy: block + allowed-endpoints: > + files.pythonhosted.org:443 + github.com:443 + proxy.golang.org:443 + pypi.org:443 + + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 + with: + python-version: '3.13' + cache: pip + + - uses: install-pinned/uv@95e90cc576af729d90be7d0233d3452899eef976 + + - run: uv pip install --system -e .[dev] + + - id: cache-pre-commit + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 + with: + path: .pre-commit-cache + key: ${{ runner.os }}-pre-commit-3.13 + + - name: Run pre-commit on all files + run: | + pre-commit install + pre-commit run --all-files + env: + PRE_COMMIT_HOME: .pre-commit-cache diff --git a/.gitignore b/.gitignore index c63caa8..e4d42f1 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ __pycache__/ # Distribution / packaging .Python -env/ build/ develop-eggs/ dist/ @@ -21,9 +20,11 @@ parts/ sdist/ var/ wheels/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -38,13 +39,18 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover +*.py,cover .hypothesis/ +.pytest_cache/ +cover/ +results.sarif # Translations *.mo @@ -53,6 +59,8 @@ coverage.xml # Django stuff: *.log local_settings.py +db.sqlite3 +db.sqlite3-journal # Flask stuff: instance/ @@ -65,30 +73,61 @@ instance/ docs/_build/ # PyBuilder +.pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints -# pyenv -.python-version +# IPython +profile_default/ +ipython_config.py -# celery beat schedule file +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff celerybeat-schedule +celerybeat.pid # SageMath parsed files *.sage.py -# dotenv +# Environments .env - -# ctags -tags - -# virtualenv .venv +env/ venv/ ENV/ +env.bak/ +venv.bak/ # Spyder project settings .spyderproject @@ -102,6 +141,21 @@ ENV/ # mypy .mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ -# webdrivers -chromedriver +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..eba48cc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: 'v5.0.0' + hooks: + - id: check-yaml + - id: check-ast + - id: check-builtin-literals + - id: check-case-conflict + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-toml + - id: debug-statements + - id: mixed-line-ending + - repo: https://github.com/asottile/pyupgrade + rev: 'v3.19.1' + hooks: + - id: pyupgrade + args: ['--py39-plus'] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: 'v0.11.4' + hooks: + - id: ruff + - id: ruff-format + - repo: https://github.com/PyCQA/bandit + rev: '1.8.3' + hooks: + - id: bandit + args: ['--confidence-level', 'medium'] + files: '^requestium' \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e1a0970..0000000 --- a/.travis.yml +++ /dev/null @@ -1,38 +0,0 @@ -sudo: false -language: python -python: - - '2.7' - - '3.4' - - '3.5' - - '3.6' -install: - - pip install tox-travis - - wget -N https://chromedriver.storage.googleapis.com/99.0.4844.51/chromedriver_linux64.zip -P ~/ - - unzip ~/chromedriver_linux64.zip -d ~/ - - rm ~/chromedriver_linux64.zip - - sudo mv -f ~/chromedriver /usr/local/share/ - - sudo chmod +x /usr/local/share/chromedriver - - sudo ln -s /usr/local/share/chromedriver /usr/local/bin/chromedriver -script: tox -deploy: - # Test PyPI in every change to master - - provider: pypi - server: https://test.pypi.org/legacy/ - distributions: sdist bdist_wheel --universal - user: $PYPI_TEST_USERNAME - password: $PYPI_TEST_PASSWORD - on: - all_branches: true - tags: false - python: 3.6 - # Real PyPI in tags (ie. GitHub releases) - - provider: pypi - distributions: sdist bdist_wheel --universal - user: $PYPI_USERNAME - password: $PYPI_PASSWORD - on: - branch: master - tags: true - python: 3.6 -addons: - chrome: stable diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 614b0e7..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include *.md tox.ini LICENSE *.yml *.py -graft requestium - -global-exclude __pycache__ -global-exclude *.py[co] diff --git a/README.md b/README.md index 557a3ae..487005e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,11 @@ ![Requestium](https://user-images.githubusercontent.com/14966348/32966130-8bb15b00-cbb7-11e7-9faf-85963ec5bd82.png) ======== -[![Build Status](https://travis-ci.org/tryolabs/requestium.svg?branch=master)](https://travis-ci.org/tryolabs/requestium) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/requestium)](https://pypi.org/project/requestium/) +[![PyPI](https://img.shields.io/pypi/v/requestium?color=blue)](https://pypi.org/project/requestium/) +[![Coverage](https://raw.githubusercontent.com/tryolabs/requestium/python-coverage-comment-action-data/badge.svg)](https://htmlpreview.github.io/?https://github.com/tryolabs/requestium/blob/python-coverage-comment-action-data/htmlcov/index.html) +[![Python checks](https://github.com/tryolabs/requestium/actions/workflows/python.yml/badge.svg)](https://github.com/tryolabs/requestium/actions/workflows/python.yml) Requestium is a Python library that merges the power of [Requests](https://github.com/requests/requests), [Selenium](https://github.com/SeleniumHQ/selenium), and [Parsel](https://github.com/scrapy/parsel) into a single integrated tool for automatizing web actions. diff --git a/pyproject.toml b/pyproject.toml index 0237684..c51f76f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,20 +1,86 @@ -[tool.poetry] +[project] name = "requestium" -version = "0.4.0" -description = "" -authors = ["Joaquin Alori "] +version = "0.5.0" readme = "README.md" +requires-python = ">=3.9" +license = { file = "LICENSE" } +authors = [ + { name = "Joaquin Alori", email = "joaquin@tryolabs.com" } +] +maintainers = [ + { name = "Judson Neer", email = "jkudson.neer@gmail.com" }, + { name = "Wil T", email = "wil.t.me@pm.me" }, +] +dependencies = [ + "parsel>=1.0", + "requests>=2.0", + "selenium>=4.0", + "tldextract>=5.0", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Natural Language :: English", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Operating System :: OS Independent", + "Environment :: Web Environment", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Software Development :: Testing", +] -[tool.poetry.dependencies] -python = ">=3.9,<3.14" -parsel = "^1.8.1" -requests = "^2.31.0" -selenium = "^4.15.2" -tldextract = "^5.1.1" +[project.urls] +source = "https://github.com/tryolabs/requestium" +download = "https://pypi.org/project/requestium/#files" +issues = "https://github.com/tryolabs/requestium/issues" -[tool.poetry.group.dev.dependencies] -pytest = "^7.4.3" +[project.optional-dependencies] +dev = [ + "bandit[sarif]==1.8.3", + "coverage==7.8.0", + "pre-commit==4.2.0", + "pytest-cov==6.0.0", + "pytest-xdist==3.6.1", + "pytest==8.3.5", + "ruff==0.11.4", +] -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +[tool.ruff] +line-length = 160 +target-version = "py39" +include = [ + "requestium/**/*.py", + "tests/**/*.py", +] + +[tool.pytest.ini_options] +addopts = "--cov=requestium -n auto" +testpaths = [ + "tests/", +] + +[tool.coverage.run] +branch = true +relative_files = true +command_line = "-m pytest" + +[tool.coverage.paths] +source = [ + "requestium/", +] +omit = [ + "tests/", +] + +[tool.coverage.report] +exclude_also = [ + "logger.", +] diff --git a/requestium/requestium.py b/requestium/requestium.py index 6941f51..2d43160 100644 --- a/requestium/requestium.py +++ b/requestium/requestium.py @@ -27,9 +27,12 @@ class Session(requests.Session): Some useful helper methods and object wrappings have been added. """ - def __init__(self, webdriver_path=None, headless=None, default_timeout=5, - webdriver_options={}, driver=None, **kwargs): - super(Session, self).__init__() + def __init__(self, webdriver_path=None, headless=None, default_timeout=5, webdriver_options=None, driver=None, **kwargs): + super().__init__() + + if webdriver_options is None: + webdriver_options = {} + self.webdriver_path = webdriver_path self.default_timeout = default_timeout self.webdriver_options = webdriver_options @@ -40,13 +43,13 @@ def __init__(self, webdriver_path=None, headless=None, default_timeout=5, self._driver_initializer = functools.partial(self._start_chrome_browser, headless=headless) else: for name in DriverMixin.__dict__: - name_private = name.startswith('__') and name.endswith('__') + name_private = name.startswith("__") and name.endswith("__") name_function = isinstance(DriverMixin.__dict__[name], types.FunctionType) name_in_driver = name in dir(self._driver) if name_private or not name_function or name_in_driver: continue self._driver.__dict__[name] = DriverMixin.__dict__[name].__get__(self._driver) - setattr(self._driver, 'default_timeout', self.default_timeout) + self._driver.default_timeout = self.default_timeout @property def driver(self): @@ -54,42 +57,41 @@ def driver(self): self._driver = self._driver_initializer() return self._driver - def _start_chrome_browser(self, headless=False): + def _start_chrome_browser(self, headless=False): # noqa C901 # TODO transfer of proxies and headers: Not supported by chromedriver atm. # Choosing not to use plug-ins for this as I don't want to worry about the # extra dependencies and plug-ins don't work in headless mode. :-( chrome_options = webdriver.chrome.options.Options() if headless: - chrome_options.add_argument('headless=new') + chrome_options.add_argument("headless=new") - if 'binary_location' in self.webdriver_options: - chrome_options.binary_location = self.webdriver_options['binary_location'] + if "binary_location" in self.webdriver_options: + chrome_options.binary_location = self.webdriver_options["binary_location"] - if 'arguments' in self.webdriver_options: - if isinstance(self.webdriver_options['arguments'], list): - for arg in self.webdriver_options['arguments']: + if "arguments" in self.webdriver_options: + if isinstance(self.webdriver_options["arguments"], list): + for arg in self.webdriver_options["arguments"]: chrome_options.add_argument(arg) else: - raise Exception('A list is needed to use \'arguments\' option. Found {}'.format( - type(self.webdriver_options['arguments']))) + raise Exception("A list is needed to use 'arguments' option. Found {}".format(type(self.webdriver_options["arguments"]))) - if 'extensions' in self.webdriver_options: - if isinstance(self.webdriver_options['extensions'], list): - for arg in self.webdriver_options['extensions']: + if "extensions" in self.webdriver_options: + if isinstance(self.webdriver_options["extensions"], list): + for arg in self.webdriver_options["extensions"]: chrome_options.add_extension(arg) - if 'prefs' in self.webdriver_options: - prefs = self.webdriver_options['prefs'] - chrome_options.add_experimental_option('prefs', prefs) + if "prefs" in self.webdriver_options: + prefs = self.webdriver_options["prefs"] + chrome_options.add_experimental_option("prefs", prefs) - experimental_options = self.webdriver_options.get('experimental_options') + experimental_options = self.webdriver_options.get("experimental_options") if isinstance(experimental_options, dict): for name, value in experimental_options.items(): chrome_options.add_experimental_option(name, value) # Create driver process - RequestiumChrome = type('RequestiumChrome', (DriverMixin, webdriver.Chrome), {}) + RequestiumChrome = type("RequestiumChrome", (DriverMixin, webdriver.Chrome), {}) # Selenium updated webdriver.Chrome's arg and kwargs, to accept options, service, keep_alive # since ChromeService is the only object where webdriver_path is mapped to executable_path, it must be # initialized and passed in as a kwarg to RequestiumChrome so it can be passed in as a kwarg @@ -107,13 +109,11 @@ def transfer_session_cookies_to_driver(self, domain=None): if not domain and self._last_requests_url: domain = tldextract.extract(self._last_requests_url).registered_domain elif not domain and not self._last_requests_url: - raise Exception('Trying to transfer cookies to selenium without specifying a domain ' - 'and without having visited any page in the current session') + raise Exception("Trying to transfer cookies to selenium without specifying a domain and without having visited any page in the current session") # Transfer cookies for c in [c for c in self.cookies if domain in c.domain]: - cookie = {'name': c.name, 'value': c.value, 'path': c.path, - 'expiry': c.expires, 'domain': c.domain} + cookie = {"name": c.name, "value": c.value, "path": c.path, "expiry": c.expires, "domain": c.domain} self.driver.ensure_add_cookie({k: v for k, v in cookie.items() if v is not None}) @@ -122,25 +122,25 @@ def transfer_driver_cookies_to_session(self, copy_user_agent=True): self.copy_user_agent_from_driver() for cookie in self.driver.get_cookies(): - self.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain']) + self.cookies.set(cookie["name"], cookie["value"], domain=cookie["domain"]) def get(self, *args, **kwargs): - resp = super(Session, self).get(*args, **kwargs) + resp = super().get(*args, **kwargs) self._last_requests_url = resp.url return RequestiumResponse(resp) def post(self, *args, **kwargs): - resp = super(Session, self).post(*args, **kwargs) + resp = super().post(*args, **kwargs) self._last_requests_url = resp.url return RequestiumResponse(resp) def put(self, *args, **kwargs): - resp = super(Session, self).put(*args, **kwargs) + resp = super().put(*args, **kwargs) self._last_requests_url = resp.url return RequestiumResponse(resp) def copy_user_agent_from_driver(self): - """ Updates requests' session user-agent with the driver's user agent + """Updates requests' session user-agent with the driver's user agent This method will start the browser process if its not already running. """ @@ -148,13 +148,11 @@ def copy_user_agent_from_driver(self): self.headers.update({"user-agent": selenium_user_agent}) -class RequestiumResponse(object): +class RequestiumResponse: """Adds xpath, css, and regex methods to a normal requests response object""" def __init__(self, response): - self.__class__ = type(response.__class__.__name__, - (self.__class__, response.__class__), - response.__dict__) + self.__class__ = type(response.__class__.__name__, (self.__class__, response.__class__), response.__dict__) @property def selector(self): @@ -177,13 +175,12 @@ def re_first(self, *args, **kwargs): return self.selector.re_first(*args, **kwargs) -class DriverMixin(object): - """Provides helper methods to our driver classes - """ +class DriverMixin: + """Provides helper methods to our driver classes""" def __init__(self, *args, **kwargs): - self.default_timeout = kwargs.pop('default_timeout', None) - super(DriverMixin, self).__init__(*args, **kwargs) + self.default_timeout = kwargs.pop("default_timeout", None) + super().__init__(*args, **kwargs) def try_add_cookie(self, cookie): """Attempt to add the cookie. Suppress any errors, and simply @@ -191,7 +188,7 @@ def try_add_cookie(self, cookie): """ try: self.add_cookie(cookie) - except Exception: + except WebDriverException: pass return self.is_cookie_in_driver(cookie) @@ -220,28 +217,28 @@ def ensure_add_cookie(self, cookie, override_domain=None): was to not do anything, which was very hard to debug. """ if override_domain: - cookie['domain'] = override_domain + cookie["domain"] = override_domain - cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + cookie_domain = cookie["domain"] if cookie["domain"][0] != "." else cookie["domain"][1:] try: browser_domain = tldextract.extract(self.current_url).fqdn except AttributeError: - browser_domain = '' + browser_domain = "" if cookie_domain not in browser_domain: # TODO Check if hardcoding 'http' causes trouble # TODO Consider using a new proxy for this next request to not cause an anomalous # request. This way their server sees our ip address as continuously having the # same cookies and not have a request mid-session with no cookies - self.get('http://' + cookie_domain) + self.get("http://" + cookie_domain) cookie_added = self.try_add_cookie(cookie) # If we fail adding the cookie, retry with a more permissive domain if not cookie_added: - cookie['domain'] = tldextract.extract(cookie['domain']).registered_domain + cookie["domain"] = tldextract.extract(cookie["domain"]).registered_domain cookie_added = self.try_add_cookie(cookie) if not cookie_added: - raise WebDriverException("Couldn't add the following cookie to the webdriver: {}".format(cookie)) + raise WebDriverException(f"Couldn't add the following cookie to the webdriver: {cookie}") def is_cookie_in_driver(self, cookie): """We check that the cookie is correctly added to the driver @@ -250,9 +247,9 @@ def is_cookie_in_driver(self, cookie): We are a bit lenient when comparing domains. """ for driver_cookie in self.get_cookies(): - name_matches = cookie['name'] == driver_cookie['name'] - value_matches = cookie['value'] == driver_cookie['value'] - domain_matches = driver_cookie['domain'] in (cookie['domain'], '.' + cookie['domain']) + name_matches = cookie["name"] == driver_cookie["name"] + value_matches = cookie["value"] == driver_cookie["value"] + domain_matches = driver_cookie["domain"] in (cookie["domain"], "." + cookie["domain"]) if name_matches and value_matches and domain_matches: return True return False @@ -303,11 +300,11 @@ def ensure_element(self, locator: str, selector: str, state: str = "present", ti More info at: http://selenium-python.readthedocs.io/waits.html """ locators_compatibility = { - 'link_text': By.LINK_TEXT, - 'partial_link_text': By.PARTIAL_LINK_TEXT, - 'tag_name': By.TAG_NAME, - 'class_name': By.CLASS_NAME, - 'css_selector': By.CSS_SELECTOR + "link_text": By.LINK_TEXT, + "partial_link_text": By.PARTIAL_LINK_TEXT, + "tag_name": By.TAG_NAME, + "class_name": By.CLASS_NAME, + "css_selector": By.CSS_SELECTOR, } if locator in locators_compatibility: warnings.warn( @@ -315,35 +312,25 @@ def ensure_element(self, locator: str, selector: str, state: str = "present", ti Support for locator strategy names with underscores is deprecated. Use strategies from Selenium's By class (importable from selenium.webdriver.common.by). """, - DeprecationWarning + DeprecationWarning, + stacklevel=2, ) locator = locators_compatibility[locator] if not timeout: timeout = self.default_timeout - if state == 'visible': - element = WebDriverWait(self, timeout).until( - expected_conditions.visibility_of_element_located((locator, selector)) - ) - elif state == 'clickable': - element = WebDriverWait(self, timeout).until( - expected_conditions.element_to_be_clickable((locator, selector)) - ) - elif state == 'present': - element = WebDriverWait(self, timeout).until( - expected_conditions.presence_of_element_located((locator, selector)) - ) - elif state == 'invisible': - WebDriverWait(self, timeout).until( - expected_conditions.invisibility_of_element_located((locator, selector)) - ) + if state == "visible": + element = WebDriverWait(self, timeout).until(expected_conditions.visibility_of_element_located((locator, selector))) + elif state == "clickable": + element = WebDriverWait(self, timeout).until(expected_conditions.element_to_be_clickable((locator, selector))) + elif state == "present": + element = WebDriverWait(self, timeout).until(expected_conditions.presence_of_element_located((locator, selector))) + elif state == "invisible": + WebDriverWait(self, timeout).until(expected_conditions.invisibility_of_element_located((locator, selector))) element = None else: - raise ValueError( - "The 'state' argument must be 'visible', 'clickable', 'present' " - "or 'invisible', not '{}'".format(state) - ) + raise ValueError(f"The 'state' argument must be 'visible', 'clickable', 'present' or 'invisible', not '{state}'") # We add this method to our element to provide a more robust click. Chromedriver # sometimes needs some time before it can click an item, specially if it needs to @@ -392,10 +379,12 @@ def _ensure_click(self): # - It is outside of the viewport # - It is under a banner or toolbar # This script solves both cases - script = ("var viewPortHeight = Math.max(" - "document.documentElement.clientHeight, window.innerHeight || 0);" - "var elementTop = arguments[0].getBoundingClientRect().top;" - "window.scrollBy(0, elementTop-(viewPortHeight/2));") + script = ( + "var viewPortHeight = Math.max(" + "document.documentElement.clientHeight, window.innerHeight || 0);" + "var elementTop = arguments[0].getBoundingClientRect().top;" + "window.scrollBy(0, elementTop-(viewPortHeight/2));" + ) self.parent.execute_script(script, self) # parent = the webdriver for _ in range(10): @@ -405,8 +394,4 @@ def _ensure_click(self): except WebDriverException as e: exception_message = str(e) time.sleep(0.2) - raise WebDriverException( - "Couldn't click item after trying 10 times, got error message: \n{}".format( - exception_message - ) - ) + raise WebDriverException(f"Couldn't click item after trying 10 times, got error message: \n{exception_message}") diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8ff3530..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -parsel>=1.8.1 -requests>=2.31.0 -selenium>=4.15.2 -tldextract>=5.1.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 107cf77..0000000 --- a/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -# Always prefer setuptools over distutils -from setuptools import setup - -# Get the long description from the README file -with open('README.md') as file: - long_description = file.read() - -setup( - name='requestium', - version='0.3.0', - description=( - "Adds a Selenium webdriver and parsel's parser to a request's Session " - "object, and makes switching between them seamless. Handles cookie, " - "proxy and header transfer." - ), - long_description=long_description, - long_description_content_type='text/markdown', - author='Joaquin Alori', - author_email='joaquin@tryolabs.com', - url='https://github.com/tryolabs/requestium', - packages=('requestium',), - install_requires=( - 'parsel>=1.7.0', - 'requests>=2.28.1', - 'selenium>=4.6.0', - 'tldextract>=3.4.0', - ), - license='MIT', - zip_safe=False, - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3', - ], -) diff --git a/tests/test_ensure_elements_deprecation.py b/tests/test_ensure_elements_deprecation.py index 96ce208..cb4854f 100644 --- a/tests/test_ensure_elements_deprecation.py +++ b/tests/test_ensure_elements_deprecation.py @@ -2,20 +2,19 @@ import pytest import selenium -from selenium.webdriver.common.by import By import requestium -chrome_webdriver_path = shutil.which('chromedriver') +chrome_webdriver_path = shutil.which("chromedriver") chrome_webdriver = selenium.webdriver.chrome.webdriver.WebDriver() firefox_webdriver = selenium.webdriver.firefox.webdriver.WebDriver() session_parameters = [ - {'webdriver_path': chrome_webdriver_path}, - {'webdriver_path': chrome_webdriver_path, 'headless': True}, - {'driver': chrome_webdriver}, - {'driver': firefox_webdriver}, + {"webdriver_path": chrome_webdriver_path}, + {"webdriver_path": chrome_webdriver_path, "headless": True}, + {"driver": chrome_webdriver}, + {"driver": firefox_webdriver}, ] @@ -27,6 +26,6 @@ def session(request): def test_deprecation_warning_for_ensure_element_locators_with_underscores(session): - session.driver.get('http://the-internet.herokuapp.com') + session.driver.get("http://the-internet.herokuapp.com") with pytest.warns(DeprecationWarning): - session.driver.ensure_element("class_name", 'no-js') + session.driver.ensure_element("class_name", "no-js") diff --git a/tests/test_requestium.py b/tests/test_requestium.py index a18caf3..bcc2c3d 100644 --- a/tests/test_requestium.py +++ b/tests/test_requestium.py @@ -6,16 +6,16 @@ import requestium -chrome_webdriver_path = shutil.which('chromedriver') +chrome_webdriver_path = shutil.which("chromedriver") chrome_webdriver = selenium.webdriver.chrome.webdriver.WebDriver() firefox_webdriver = selenium.webdriver.firefox.webdriver.WebDriver() session_parameters = [ - {'webdriver_path': chrome_webdriver_path}, - {'webdriver_path': chrome_webdriver_path, 'headless': True}, - {'driver': chrome_webdriver}, - {'driver': firefox_webdriver}, + {"webdriver_path": chrome_webdriver_path}, + {"webdriver_path": chrome_webdriver_path, "headless": True}, + {"driver": chrome_webdriver}, + {"driver": firefox_webdriver}, ] @@ -27,9 +27,9 @@ def session(request): def test_simple_page_load(session): - session.driver.get('http://the-internet.herokuapp.com') - session.driver.ensure_element(By.ID, 'content') + session.driver.get("http://the-internet.herokuapp.com") + session.driver.ensure_element(By.ID, "content") title = session.driver.title heading = session.driver.find_element(By.XPATH, '//*[@id="content"]/h1') - assert title == 'The Internet' - assert heading.text == 'Welcome to the-internet' + assert title == "The Internet" + assert heading.text == "Welcome to the-internet" diff --git a/tox.ini b/tox.ini deleted file mode 100644 index e926590..0000000 --- a/tox.ini +++ /dev/null @@ -1,14 +0,0 @@ -[tox] -envlist = py{27,34,35,36} - -[testenv] -passenv = TOXENV CI TRAVIS TRAVIS_* -deps = - check-manifest - flake8 - pytest -commands = - pip install -e . - check-manifest - flake8 luminoth - pytest