Code for How to Analyze the Most In-Demand Tech Skills from Job Postings in Python Tutorial

skill_analyzer.py

import re
import json
from collections import Counter

# Map each skill to the regex pattern that matches it.
# Word boundaries (\b) prevent false positives like matching
# "go" inside "google" or "java" inside "javascript".
SKILL_PATTERNS = {
    "Python":           r"\bpython\b",
    "SQL":              r"\bsql\b",
    "JavaScript":       r"\b(javascript|js)\b",
    "TypeScript":       r"\b(typescript|ts)\b",
    "Java":             r"\bjava\b(?!script)",
    "Go":               r"\b(golang|go)\b",
    "Rust":             r"\brust\b",
    "C++":              r"c\+\+",
    "AWS":              r"\baws\b",
    "Docker":          r"\bdocker\b",
    "Kubernetes":       r"\b(kubernetes|k8s)\b",
    "React":           r"\breact\b",
    "Machine Learning": r"\b(machine learning|ml)\b",
    "Pandas":         r"\bpandas\b",
}


def load_jobs(path):
    """Load job postings from a JSON file (a list of {title, description})."""
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def count_skills(jobs):
    """Return a Counter of how many postings mention each skill."""
    counts = Counter()
    compiled = {skill: re.compile(pat, re.IGNORECASE)
                for skill, pat in SKILL_PATTERNS.items()}
    for job in jobs:
        text = f"{job.get('title','')} {job.get('description','')}"
        seen = set()
        for skill, pattern in compiled.items():
            if pattern.search(text) and skill not in seen:
                counts[skill] += 1
                seen.add(skill)
    return counts


def rank_skills(counts, total_jobs):
    """Turn raw counts into a sorted list of (skill, count, percentage)."""
    ranked = []
    for skill, count in counts.most_common():
        pct = round((count / total_jobs) * 100, 1)
        ranked.append((skill, count, pct))
    return ranked


if __name__ == "__main__":
    jobs = load_jobs("sample_jobs.json")
    counts = count_skills(jobs)
    ranked = rank_skills(counts, len(jobs))

    print(f"Analyzed {len(jobs)} job postings\n")
    print(f"{'Skill':<18}{'Postings':<10}{'% of jobs'}")
    print("-" * 38)
    for skill, count, pct in ranked:
        print(f"{skill:<18}{count:<10}{pct}%")

make_chart.py

import matplotlib
matplotlib.use("Agg")  # non-interactive backend, good for saving files
import matplotlib.pyplot as plt
from skill_analyzer import load_jobs, count_skills, rank_skills

jobs = load_jobs("sample_jobs.json")
ranked = rank_skills(count_skills(jobs), len(jobs))

# Take the top 10 skills for a clean chart
top = ranked[:10]
skills = [r[0] for r in top]
percentages = [r[2] for r in top]

plt.figure(figsize=(10, 6))
# Reverse the lists so the highest value sits at the top of the chart
plt.barh(skills[::-1], percentages[::-1], color="#306998")
plt.xlabel("Percentage of job postings (%)")
plt.title("Top 10 In-Demand Tech Skills")
plt.tight_layout()
plt.savefig("skills_chart.png", dpi=120)
print("Chart saved to skills_chart.png")