Skip to content

Commit a690bdd

Browse files
committed
wp-details detector
1 parent 01bbad6 commit a690bdd

File tree

6 files changed

+403
-0
lines changed

6 files changed

+403
-0
lines changed

.github/workflows/pipy-publish.yml

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# This workflow will upload a Python Package using Twine when a release is created
2+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3+
4+
name: Upload Python Package
5+
6+
on:
7+
#push:
8+
# branches: [main]
9+
10+
release:
11+
types: [published]
12+
13+
jobs:
14+
deploy:
15+
16+
runs-on: ubuntu-latest
17+
18+
steps:
19+
- uses: actions/checkout@v2
20+
- name: Set up Python
21+
uses: actions/setup-python@v2
22+
with:
23+
python-version: '3.x'
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install setuptools wheel twine
28+
- name: Build and publish
29+
env:
30+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
31+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
32+
run: |
33+
python setup.py sdist bdist_wheel
34+
twine upload dist/*

.gitignore

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/
108+
venv/
109+
ENV/
110+
env.bak/
111+
venv.bak/
112+
113+
# Spyder project settings
114+
.spyderproject
115+
.spyproject
116+
117+
# Rope project settings
118+
.ropeproject
119+
120+
# mkdocs documentation
121+
/site
122+
123+
# mypy
124+
.mypy_cache/
125+
.dmypy.json
126+
dmypy.json
127+
128+
# Pyre type checker
129+
.pyre/

README.md

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# cms-detector
2+
3+
## Whats in cms-detector?
4+
5+
A Python Package to detect the Content Management System of a Website.
6+
7+
We believe in monolithic software development and created this tiny package that does its job without any bloat.
8+
9+
## How to Use cms-detector?
10+
11+
You can find detailed tutorial on [cms-detector tutorial website](https://serpwings.com/software/python-cms-detector/).
12+
13+
## Contribute
14+
15+
Pull Requests, Feature Suggestions, and collaborations are welcome.
16+
17+
## About Us
18+
19+
This work is a collaborative effort of [seowings](https://seowings.org/), and [serpwings](https://serpwings.com/).

cms_detector/__init__.py

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*- #
3+
4+
"""
5+
cms-detector: A Python Package to detect the Content Management System of a Website.
6+
7+
MIT License
8+
Copyright (c) 2023 SERP Wings www.serpwings.com
9+
Permission is hereby granted, free of charge, to any person obtaining a copy
10+
of this software and associated documentation files (the "Software"), to deal
11+
in the Software without restriction, including without limitation the rights
12+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13+
copies of the Software, and to permit persons to whom the Software is
14+
furnished to do so, subject to the following conditions:
15+
The above copyright notice and this permission notice shall be included in all
16+
copies or substantial portions of the Software.
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.
24+
"""
25+
26+
27+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
28+
# IMPORTS Standard Library
29+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
30+
31+
import re
32+
from unittest.mock import Mock
33+
34+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
35+
# IMPORTS 3rd Party Libraries
36+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
37+
38+
import requests
39+
from requests.adapters import HTTPAdapter
40+
from requests.models import Response
41+
from bs4 import BeautifulSoup
42+
43+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
44+
# DATABASE/CONSTANTS LIST
45+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
46+
47+
HEADER = {
48+
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
49+
}
50+
51+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
52+
# Utility Functions
53+
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
54+
55+
56+
def mock_requests_object(url):
57+
"""Generates a mock request object"""
58+
response = Mock(spec=Response)
59+
response.text = ""
60+
response.status_code = 9999
61+
response.url = url
62+
return response
63+
64+
65+
def get_remote_content(url, max_retires=2):
66+
"""Get remote content avialble on a given url"""
67+
try:
68+
s = requests.Session()
69+
s.mount(url, HTTPAdapter(max_retries=max_retires))
70+
return s.get(url, headers=HEADER)
71+
except:
72+
return mock_requests_object(url)
73+
74+
75+
def get_corrected_url(url, fix_slash="/"):
76+
"""correct scheme and end slash of a url"""
77+
if not url.startswith("http://") and not url.startswith("https://"):
78+
url = f"http://{url}"
79+
80+
if not url.endswith(fix_slash):
81+
url = f"{url}{fix_slash}"
82+
83+
return url
84+
85+
86+
def wp_details(target_url):
87+
"""Check if WordPress is installed on a given webiste.
88+
89+
It will also return name of plugins and themes, if installed on the website.
90+
91+
"""
92+
93+
target_url = get_corrected_url(target_url, fix_slash="/")
94+
response = get_remote_content(target_url)
95+
96+
if response.status_code < 400:
97+
link_regex = re.compile(
98+
"((https?):((/)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)",
99+
re.DOTALL,
100+
)
101+
all_link = set([link[0] for link in re.findall(link_regex, response.text)])
102+
wp_content = [meta for meta in all_link if "wp-content" in meta]
103+
wp_includes = [meta for meta in all_link if "wp-includes" in meta]
104+
wp_json = [meta for meta in all_link if "wp-json" in meta]
105+
106+
themes = [
107+
re.search("/themes/(.*)/", link) for link in all_link if "/themes/" in link
108+
]
109+
110+
if themes:
111+
themes = list(
112+
set([theme.group(1).split("/")[0] for theme in themes if theme])
113+
)
114+
115+
plugins = [
116+
re.search("/plugins/(.*)/", link)
117+
for link in all_link
118+
if "/plugins/" in link
119+
]
120+
121+
if plugins:
122+
plugins = list(
123+
set([plugin.group(1).split("/")[0] for plugin in plugins if plugin])
124+
)
125+
126+
wp_found = False
127+
wp_version = ""
128+
129+
if any([wp_content, wp_includes, wp_json]):
130+
wp_found = True
131+
soup_xml = BeautifulSoup(response.content, "lxml")
132+
wp_version_tag = soup_xml.find("meta", attrs={"name": "generator"})
133+
if wp_version_tag:
134+
wp_version = wp_version_tag.get("content")
135+
136+
return {
137+
"is_wp_installed": wp_found,
138+
"wp_version": wp_version,
139+
"themes": themes,
140+
"plugins": plugins,
141+
}

examples/tutorial.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from cms_detector import wp_details
2+
3+
result = wp_details(target_url="https://wordpress.org")
4+
5+
print(result)

0 commit comments

Comments
 (0)