Skip to content

Commit a03d659

Browse files
improve #4, fix #5
- now all files are imported & functions are called automatically, so no need to import your module! Just drop the file in the folder and watch the magic happen - add factor of importance: after importing the files, the script executes the functions with the greatest importance first
1 parent d8bde82 commit a03d659

14 files changed

+195
-68
lines changed

django-check-seo/checks/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Standard Library
2+
import glob
3+
from os.path import basename, dirname, isfile, join
4+
5+
6+
# list files
7+
modules = glob.glob(join(dirname(__file__), "*.py"))
8+
9+
__all__ = []
10+
11+
# add them to __all__ so they can be imported
12+
for module in modules:
13+
if (
14+
isfile(module)
15+
and not module.endswith("__init__.py")
16+
and not module.endswith("launch_checks.py")
17+
):
18+
__all__.append(basename(module)[:-3])

django-check-seo/checks/check_description.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,16 @@
55
from django.utils.translation import gettext as _
66

77

8-
def check_description(site):
8+
def importance():
9+
"""Scripts with higher importance will be executed in first.
10+
11+
Returns:
12+
int -- Importance of the script.
13+
"""
14+
return 1
15+
16+
17+
def run(site):
918
meta = site.soup.find_all("meta")
1019
for tag in meta:
1120
if (

django-check-seo/checks/check_h1.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,16 @@
55
from django.utils.translation import gettext as _
66

77

8-
def check_h1(site):
8+
def importance():
9+
"""Scripts with higher importance will be executed in first.
10+
11+
Returns:
12+
int -- Importance of the script.
13+
"""
14+
return 1
15+
16+
17+
def run(site):
918
"""Check all h1-related conditions
1019
"""
1120

django-check-seo/checks/check_h2.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,19 @@
55
from django.utils.translation import gettext as _
66

77

8-
def check_h2(self):
9-
h2 = self.soup.find_all("h2")
8+
def importance():
9+
"""Scripts with higher importance will be executed in first.
10+
11+
Returns:
12+
int -- Importance of the script.
13+
"""
14+
return 1
15+
16+
17+
def run(site):
18+
h2 = site.soup.find_all("h2")
1019
if not h2:
11-
self.warnings.append(
20+
site.warnings.append(
1221
{
1322
"name": _("No h2 tag"),
1423
"settings": _("at least 1"),
@@ -20,7 +29,7 @@ def check_h2(self):
2029
else:
2130
occurence = []
2231
# check if each keyword
23-
for keyword in self.keywords:
32+
for keyword in site.keywords:
2433
# is present at least
2534
for single_h2 in h2:
2635
occurence.append(
@@ -34,7 +43,7 @@ def check_h2(self):
3443
)
3544
# if no keyword is found in h2
3645
if not any(i > 0 for i in occurence):
37-
self.warnings.append(
46+
site.warnings.append(
3847
{
3948
"name": _("No keyword in h2"),
4049
"settings": _("at least 1"),

django-check-seo/checks/check_images.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
11
# Third party
2+
import bs4
23
from django.utils.translation import gettext as _
34

45

5-
def check_images(self):
6-
images = self.content.find_all("img")
6+
def importance():
7+
"""Scripts with higher importance will be executed in first.
8+
9+
Returns:
10+
int -- Importance of the script.
11+
"""
12+
return 1
13+
14+
15+
def run(site):
16+
images = bs4.element.ResultSet(None)
17+
18+
for c in site.content:
19+
images += c.find_all("img")
720

821
for image in images:
922
if "alt" not in image.attrs or image.attrs["alt"] == "None":
10-
self.problems.append(
23+
site.problems.append(
1124
{
1225
"name": _("Img lack alt tag"),
1326
"settings": _("all images"),

django-check-seo/checks/check_keyword_url.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,16 @@
22
from django.utils.translation import gettext as _
33

44

5-
def check_keyword_url(site):
5+
def importance():
6+
"""Scripts with higher importance will be executed in first.
7+
8+
Returns:
9+
int -- Importance of the script.
10+
"""
11+
return 1
12+
13+
14+
def run(site):
615
"""Check presence of keywords in url
716
"""
817
for keyword in site.keywords:

django-check-seo/checks/check_keywords.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,18 @@
22
from django.utils.translation import gettext as _
33

44

5-
def check_keywords(site):
6-
"""Ensure that all keywords are present.
5+
def importance():
6+
"""Scripts with higher importance will be executed in first.
7+
8+
Returns:
9+
int -- Importance of the script.
10+
"""
11+
return 5
12+
13+
14+
def run(site):
15+
"""Ensure that meta tag exists and contain at least one keyword.
16+
Populate site.keywords list with keywords found.
717
"""
818
meta = site.soup.find_all("meta")
919
for tag in meta:

django-check-seo/checks/check_links.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,28 @@
22
import os
33

44
# Third party
5+
import bs4
56
from django.utils.translation import gettext as _
67

78

8-
def check_links(site):
9+
def importance():
10+
"""Scripts with higher importance will be executed in first.
11+
12+
Returns:
13+
int -- Importance of the script.
14+
"""
15+
return 1
16+
17+
18+
def run(site):
919
"""Check all link-related conditions
1020
"""
11-
links = site.content.find_all("a")
21+
22+
links = bs4.element.ResultSet(None)
23+
24+
for c in site.content:
25+
links = c.find_all("a")
26+
1227
internal_links = 0
1328
external_links = 0
1429

django-check-seo/checks/check_title.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,16 @@
22
from django.utils.translation import gettext as _
33

44

5-
def check_title(site):
5+
def importance():
6+
"""Scripts with higher importance will be executed in first.
7+
8+
Returns:
9+
int -- Importance of the script.
10+
"""
11+
return 1
12+
13+
14+
def run(site):
615
"""Check all title-related conditions.
716
"""
817
# title presence

django-check-seo/checks/check_url.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,16 @@
22
from django.utils.translation import gettext as _
33

44

5-
def check_url(site):
5+
def importance():
6+
"""Scripts with higher importance will be executed in first.
7+
8+
Returns:
9+
int -- Importance of the script.
10+
"""
11+
return 1
12+
13+
14+
def run(site):
615
"""All the url-related checks.
716
"""
817

django-check-seo/checks/content_words_number.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
1-
# Standard Library
2-
import re
3-
41
# Third party
52
from django.utils.translation import gettext as _
63

74

8-
def content_words_number(site):
9-
"""Count number of words in content.
5+
def importance():
6+
"""Scripts with higher importance will be executed in first.
7+
8+
Returns:
9+
int -- Importance of the script.
1010
"""
11+
return 1
1112

12-
content = re.findall(r"\w+", site.content.text.lower())
1313

14-
nb_words = len(content)
14+
def run(site):
15+
"""Count number of words in content.
16+
"""
17+
18+
nb_words = len(site.content_text)
1519

1620
# too few words
1721
if nb_words < site.settings.SEO_SETTINGS["content_words_number"][0]:

django-check-seo/checks/keyword_present_first_paragraph.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,24 @@
22
from django.utils.translation import gettext as _
33

44

5-
def keyword_present_first_paragraph(site):
5+
def importance():
6+
"""Scripts with higher importance will be executed in first.
7+
8+
Returns:
9+
int -- Importance of the script.
10+
"""
11+
return 1
12+
13+
14+
def run(site):
615
"""Get [keywords_in_first_words] first words of the content, and ensure that there is a keyword among them.
716
"""
8-
content = site.content.text.lower().split()[
17+
first_N_words = site.content_text.split()[
918
: site.settings.SEO_SETTINGS["keywords_in_first_words"]
1019
]
1120

1221
for keyword in site.keywords:
13-
if keyword in content:
22+
if keyword in first_N_words:
1423
return
1524

1625
site.problems.append(
Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
1+
# Standard Library
2+
import importlib
3+
import sys
4+
5+
# Local application / specific library imports
6+
from . import * # noqa: F403,F401
7+
8+
19
def launch_checks(site):
210
"""All the checks are performed here. Called in get_context_data().
3-
All function should add a dict in site.problems or site.warnings
11+
All functions should do its test(s), then add a dict in site.problems or site.warnings.
412
513
Arguments:
6-
site {Site} -- A set of useful vars (including problems & warnings, two lists of dict).
14+
site {Site} -- A set of useful vars that can be used by the functions (including problems & warnings, two lists of dict).
715
"""
8-
from .check_description import check_description
9-
from .check_h1 import check_h1
10-
from .check_h2 import check_h2
11-
from .check_images import check_images
12-
from .check_keywords import check_keywords
13-
from .check_keyword_url import check_keyword_url
14-
from .check_links import check_links
15-
from .check_title import check_title
16-
from .check_url import check_url
17-
from .content_words_number import content_words_number
18-
from .keyword_present_first_paragraph import keyword_present_first_paragraph
19-
20-
# add your file here pls
21-
22-
check_keywords(site)
23-
check_description(site)
24-
check_h1(site)
25-
check_h2(site)
26-
check_images(site)
27-
check_keyword_url(site)
28-
check_links(site)
29-
check_title(site)
30-
check_url(site)
31-
content_words_number(site)
32-
keyword_present_first_paragraph(site)
33-
# add your function here pls
16+
17+
modules_order = []
18+
19+
# only get modules in ...checks.*
20+
for module_name in sys.modules:
21+
if (
22+
"django-check-seo.checks." in module_name
23+
and module_name != "django-check-seo.checks.launch_checks"
24+
):
25+
module = importlib.import_module(module_name)
26+
get_module_order = getattr(module, "importance")
27+
28+
# get the importance
29+
modules_order.append([module, get_module_order()])
30+
31+
# execute modules with higher importance first from sorted list
32+
for module in sorted(modules_order, key=lambda x: x[1], reverse=True):
33+
getattr(module[0], "run")(site)

django-check-seo/views.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,9 @@ def get_context_data(self, *args, **kwargs):
4343

4444
(context["problems"], context["warnings"]) = (site.problems, site.warnings)
4545

46-
context["parsehtml"] = r.text
4746
context["settings"] = json.dumps(settings.SEO_SETTINGS, indent=4)
48-
context["contenthtml"] = site.content
49-
context["content"] = site.content_text
47+
context["html"] = site.content
48+
context["text"] = site.content_text
5049

5150
return context
5251

@@ -73,20 +72,25 @@ def __init__(self, soup, full_url):
7372
self.soup = soup
7473

7574
# Get content of the page (exclude header/footer)
76-
self.content = self.soup.find("div", {"class": "container"})
75+
self.content = self.soup.select(".container")
76+
7777
if self.content is None:
7878
self.content = ""
7979

80-
# remove ul with nav class from content (<ul class="nav">, <ul class="navbar">, or <nav>)
81-
if self.content.find("ul", {"class": "nav"}):
82-
self.content.find("ul", {"class": "nav"}).extract()
83-
elif self.content.find("ul", {"class": "navbar"}):
84-
self.content.find("ul", {"class": "navbar"}).extract()
85-
elif self.content.find("nav"):
86-
self.content.find("nav").extract()
80+
for c in self.content:
81+
# remove ul with nav class from content (<ul class="nav">, <ul class="navbar">, or <nav>)
82+
if c.find("ul", {"class": "nav"}):
83+
c.find("ul", {"class": "nav"}).extract()
84+
elif c.find("ul", {"class": "navbar"}):
85+
c.find("ul", {"class": "navbar"}).extract()
86+
elif c.find("nav"):
87+
c.find("nav").extract()
88+
89+
# get content without doublewords thx to custom separator ("<h1>Title</h1><br /><p>Content</p>" -> TitleContent)
90+
self.content_text = ""
91+
for c in self.content:
92+
self.content_text += c.get_text(separator=" ")
8793

88-
# get content without doublewords thx to separator ("<h1>Title</h1><br /><p>Content</p>" -> TitleContent)
89-
self.content_text = self.content.get_text(separator=" ")
9094
# strip multiple carriage return (with optional space) to only one
9195
self.content_text = re.sub(r"(\n( ?))+", "\n", self.content_text)
9296
# strip multiples spaces (>3) to only 2 (for title readability)

0 commit comments

Comments
 (0)