skill_analyzer.py
import re
import json
from collections import Counter
# Map each skill to the regex pattern that matches it.
# Word boundaries (\b) prevent false positives like matching
# "go" inside "google" or "java" inside "javascript".
SKILL_PATTERNS = {
"Python": r"\bpython\b",
"SQL": r"\bsql\b",
"JavaScript": r"\b(javascript|js)\b",
"TypeScript": r"\b(typescript|ts)\b",
"Java": r"\bjava\b(?!script)",
"Go": r"\b(golang|go)\b",
"Rust": r"\brust\b",
"C++": r"c\+\+",
"AWS": r"\baws\b",
"Docker": r"\bdocker\b",
"Kubernetes": r"\b(kubernetes|k8s)\b",
"React": r"\breact\b",
"Machine Learning": r"\b(machine learning|ml)\b",
"Pandas": r"\bpandas\b",
}
def load_jobs(path):
"""Load job postings from a JSON file (a list of {title, description})."""
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def count_skills(jobs):
"""Return a Counter of how many postings mention each skill."""
counts = Counter()
compiled = {skill: re.compile(pat, re.IGNORECASE)
for skill, pat in SKILL_PATTERNS.items()}
for job in jobs:
text = f"{job.get('title','')} {job.get('description','')}"
seen = set()
for skill, pattern in compiled.items():
if pattern.search(text) and skill not in seen:
counts[skill] += 1
seen.add(skill)
return counts
def rank_skills(counts, total_jobs):
"""Turn raw counts into a sorted list of (skill, count, percentage)."""
ranked = []
for skill, count in counts.most_common():
pct = round((count / total_jobs) * 100, 1)
ranked.append((skill, count, pct))
return ranked
if __name__ == "__main__":
jobs = load_jobs("sample_jobs.json")
counts = count_skills(jobs)
ranked = rank_skills(counts, len(jobs))
print(f"Analyzed {len(jobs)} job postings\n")
print(f"{'Skill':<18}{'Postings':<10}{'% of jobs'}")
print("-" * 38)
for skill, count, pct in ranked:
print(f"{skill:<18}{count:<10}{pct}%")
make_chart.py
import matplotlib
matplotlib.use("Agg") # non-interactive backend, good for saving files
import matplotlib.pyplot as plt
from skill_analyzer import load_jobs, count_skills, rank_skills
jobs = load_jobs("sample_jobs.json")
ranked = rank_skills(count_skills(jobs), len(jobs))
# Take the top 10 skills for a clean chart
top = ranked[:10]
skills = [r[0] for r in top]
percentages = [r[2] for r in top]
plt.figure(figsize=(10, 6))
# Reverse the lists so the highest value sits at the top of the chart
plt.barh(skills[::-1], percentages[::-1], color="#306998")
plt.xlabel("Percentage of job postings (%)")
plt.title("Top 10 In-Demand Tech Skills")
plt.tight_layout()
plt.savefig("skills_chart.png", dpi=120)
print("Chart saved to skills_chart.png")