testsAndMisc/linux_configuration/scripts/utils/repo_to_study.sh

366 lines
11 KiB
Bash
Raw Normal View History

2026-01-07 17:03:07 +01:00
#!/usr/bin/env bash
#==============================================================================
# repo_to_study.sh - Complete pipeline: Repo → Analysis → Offline Docs → Study Materials
#
# Usage:
# repo_to_study.sh <repo_url_or_path>
#
# Examples:
# repo_to_study.sh https://github.com/user/repo
# repo_to_study.sh /path/to/local/repo
# repo_to_study.sh .
#
# Output:
# Creates study materials in ~/.local/share/study-materials/<repo_name>/
# - documentation_links.md (with offline doc paths)
# - anki_cards.txt (importable to Anki)
# - llm_anki_prompt.md (for generating more cards with AI)
#==============================================================================
set -euo pipefail
# Script directory for finding other tools
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ANALYZE_SCRIPT="$SCRIPT_DIR/analyze_repo.sh"
STUDY_SCRIPT="$SCRIPT_DIR/generate_study_materials.sh"
SETUP_DOCS_SCRIPT="$SCRIPT_DIR/setup_offline_docs.sh"
# Default output location (not in script dir, user's data dir)
STUDY_MATERIALS_BASE="$HOME/.local/share/study-materials"
# Work directories
WORK_DIR="/tmp/repo_study_$$"
2026-02-20 01:17:53 +01:00
# shellcheck disable=SC2034 # OUTPUT_DIR set dynamically by parse_args
2026-01-07 17:03:07 +01:00
OUTPUT_DIR=""
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
#==============================================================================
# Helper Functions (all print to stderr to not interfere with return values)
#==============================================================================
print_header() {
2026-02-20 01:17:53 +01:00
echo -e "\n${BOLD}${CYAN}════════════════════════════════════════════════════════════${NC}" >&2
echo -e "${BOLD}${CYAN} $1${NC}" >&2
echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════${NC}\n" >&2
2026-01-07 17:03:07 +01:00
}
print_step() {
2026-02-20 01:17:53 +01:00
echo -e "${BOLD}${BLUE}$1${NC}" >&2
2026-01-07 17:03:07 +01:00
}
print_success() {
2026-02-20 01:17:53 +01:00
echo -e "${GREEN}$1${NC}" >&2
2026-01-07 17:03:07 +01:00
}
print_error() {
2026-02-20 01:17:53 +01:00
echo -e "${RED}$1${NC}" >&2
2026-01-07 17:03:07 +01:00
}
print_info() {
2026-02-20 01:17:53 +01:00
echo -e "${YELLOW}$1${NC}" >&2
2026-01-07 17:03:07 +01:00
}
cleanup() {
2026-02-20 01:17:53 +01:00
if [ -d "$WORK_DIR" ] && [ "$WORK_DIR" != "/" ]; then
rm -rf "$WORK_DIR"
fi
2026-01-07 17:03:07 +01:00
}
trap cleanup EXIT
usage() {
2026-02-20 01:17:53 +01:00
cat <<EOF
2026-01-07 17:03:07 +01:00
repo_to_study.sh - Generate study materials from any repository
USAGE:
$(basename "$0") <repo_url_or_path> [output_dir]
ARGUMENTS:
repo_url_or_path Git URL (https/ssh) or local path to repository
output_dir Optional: where to save results
Default: ~/.local/share/study-materials/<repo_name>/
EXAMPLES:
$(basename "$0") https://github.com/python/cpython
$(basename "$0") git@github.com:torvalds/linux.git
$(basename "$0") /home/user/my-project
$(basename "$0") . ~/notes/my_study_notes
OUTPUT FILES:
documentation_links.md - Markdown with offline documentation links
anki_cards.txt - Tab-separated file for Anki import
llm_anki_prompt.md - Prompt template for AI-generated cards
analysis/ - Raw analysis data (imports, keywords, functions)
EOF
2026-02-20 01:17:53 +01:00
exit 0
2026-01-07 17:03:07 +01:00
}
#==============================================================================
# Check Dependencies
#==============================================================================
check_dependencies() {
2026-02-20 01:17:53 +01:00
local missing=()
# Check for required scripts
if [ ! -x "$ANALYZE_SCRIPT" ]; then
missing+=("analyze_repo.sh not found at $ANALYZE_SCRIPT")
fi
if [ ! -x "$STUDY_SCRIPT" ]; then
missing+=("generate_study_materials.sh not found at $STUDY_SCRIPT")
fi
# Check for basic tools
for cmd in git curl grep sed awk; do
if ! command -v "$cmd" &>/dev/null; then
missing+=("$cmd")
fi
done
if [ ${#missing[@]} -gt 0 ]; then
print_error "Missing dependencies:"
for dep in "${missing[@]}"; do
echo " - $dep"
done
exit 1
fi
2026-01-07 17:03:07 +01:00
}
#==============================================================================
# Ensure Offline Docs are Available
#==============================================================================
ensure_offline_docs() {
2026-02-20 01:17:53 +01:00
local docs_dir="$HOME/.local/share/offline-docs"
if [ ! -d "$docs_dir/python" ]; then
print_info "Offline docs not found. Setting up Python documentation..."
if [ -x "$SETUP_DOCS_SCRIPT" ]; then
"$SETUP_DOCS_SCRIPT" --python
else
print_info "Run setup_offline_docs.sh --all to enable offline documentation"
fi
fi
2026-01-07 17:03:07 +01:00
}
# Global to store repo name for cloned repos
REPO_NAME=""
#==============================================================================
# Get Repository
#==============================================================================
get_repo() {
2026-02-20 01:17:53 +01:00
local input="$1"
local repo_dir=""
# Check if it's a URL (git clone needed)
if [[ $input =~ ^https?:// ]] || [[ $input =~ ^git@ ]]; then
print_step "Cloning repository..."
# Extract repo name from URL
REPO_NAME=$(basename "$input" .git)
repo_dir="$WORK_DIR/$REPO_NAME"
mkdir -p "$WORK_DIR"
if git clone --depth 1 "$input" "$repo_dir" >&2 2>&1; then
print_success "Cloned: $input"
else
print_error "Failed to clone repository"
exit 1
fi
echo "$repo_dir"
# Local path
elif [ -d "$input" ]; then
# Convert to absolute path
repo_dir="$(cd "$input" && pwd)"
REPO_NAME=$(basename "$repo_dir")
print_success "Using local repository: $repo_dir"
echo "$repo_dir"
else
print_error "Invalid input: '$input' is not a valid URL or directory"
exit 1
fi
2026-01-07 17:03:07 +01:00
}
#==============================================================================
# Analyze Repository
#==============================================================================
analyze_repo() {
2026-02-20 01:17:53 +01:00
local repo_path="$1"
local repo_name="$REPO_NAME"
[ -z "$repo_name" ] && repo_name=$(basename "$repo_path")
2026-02-20 01:17:53 +01:00
print_step "Analyzing repository..."
2026-02-20 01:17:53 +01:00
# Run the analyzer (it outputs to stderr/stdout, results go to /tmp/repo_analysis/)
"$ANALYZE_SCRIPT" "$repo_path" >&2 || true
2026-02-20 01:17:53 +01:00
# Find the results directory
local results_dir="/tmp/repo_analysis/results_${repo_name}"
if [ ! -d "$results_dir" ]; then
# Try without prefix
results_dir="/tmp/repo_analysis/results"
fi
2026-02-20 01:17:53 +01:00
if [ ! -d "$results_dir" ] || [ ! -d "$results_dir/per_language" ]; then
print_error "Could not find analysis results at $results_dir"
exit 1
fi
2026-02-20 01:17:53 +01:00
print_success "Analysis complete: $results_dir"
echo "$results_dir"
2026-01-07 17:03:07 +01:00
}
#==============================================================================
# Generate Study Materials
#==============================================================================
generate_materials() {
2026-02-20 01:17:53 +01:00
local analysis_dir="$1"
local output_dir="$2"
print_step "Generating study materials with offline documentation..."
# Run study materials generator
cd "$analysis_dir"
if "$STUDY_SCRIPT" . 2>/dev/null | grep -E "^(Created|✓|Files created)" | head -5; then
print_success "Study materials generated"
else
# Try anyway, might have succeeded
true
fi
# Create output directory and copy results
mkdir -p "$output_dir"
# Copy generated files
[ -f "documentation_links.md" ] && cp "documentation_links.md" "$output_dir/"
[ -f "anki_cards.txt" ] && cp "anki_cards.txt" "$output_dir/"
[ -f "llm_anki_prompt.md" ] && cp "llm_anki_prompt.md" "$output_dir/"
# Copy analysis data
mkdir -p "$output_dir/analysis"
[ -d "per_language" ] && cp -r "per_language" "$output_dir/analysis/"
[ -f "grep_imports.txt" ] && cp "grep_imports.txt" "$output_dir/analysis/"
[ -f "grep_keywords.txt" ] && cp "grep_keywords.txt" "$output_dir/analysis/"
[ -f "grep_function_calls.txt" ] && cp "grep_function_calls.txt" "$output_dir/analysis/"
print_success "Files saved to: $output_dir"
2026-01-07 17:03:07 +01:00
}
#==============================================================================
# Show Summary
#==============================================================================
show_summary() {
2026-02-20 01:17:53 +01:00
local output_dir="$1"
print_header "Study Materials Ready!"
echo -e "${BOLD}Output directory:${NC} $output_dir"
echo ""
echo -e "${BOLD}Generated files:${NC}"
if [ -f "$output_dir/documentation_links.md" ]; then
local doc_lines
doc_lines=$(wc -l <"$output_dir/documentation_links.md")
echo -e " 📚 ${GREEN}documentation_links.md${NC} ($doc_lines lines)"
echo " Contains links to OFFLINE documentation"
fi
if [ -f "$output_dir/anki_cards.txt" ]; then
local card_count
card_count=$(grep -c $'^\w' "$output_dir/anki_cards.txt" 2>/dev/null || echo "0")
echo -e " 🎴 ${GREEN}anki_cards.txt${NC} (~$card_count cards)"
echo " Import to Anki: File → Import → Tab separated"
fi
if [ -f "$output_dir/llm_anki_prompt.md" ]; then
echo -e " 🤖 ${GREEN}llm_anki_prompt.md${NC}"
echo " Use with ChatGPT/Claude to generate more cards"
fi
if [ -d "$output_dir/analysis" ]; then
echo -e " 📊 ${GREEN}analysis/${NC}"
echo " Raw analysis data (imports, keywords, functions per language)"
fi
echo ""
echo -e "${BOLD}Quick preview of imports with offline docs:${NC}"
if [ -f "$output_dir/documentation_links.md" ]; then
grep -A20 "import/from" "$output_dir/documentation_links.md" 2>/dev/null |
grep "^\| \`" | head -5 |
sed 's/|/│/g'
fi
echo ""
echo -e "${BOLD}Next steps:${NC}"
echo " 1. Open documentation_links.md to browse offline docs"
echo " 2. Import anki_cards.txt into Anki for spaced repetition"
echo " 3. Use llm_anki_prompt.md to generate more targeted cards"
echo ""
echo -e "${CYAN}To view a doc:${NC} xdg-open 'file:///path/from/documentation_links.md'"
2026-01-07 17:03:07 +01:00
}
#==============================================================================
# Main
#==============================================================================
main() {
2026-02-20 01:17:53 +01:00
# Handle help
if [ $# -lt 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
usage
fi
local input="$1"
local output_dir="${2:-}" # Will be set after we know repo name
print_header "Repo → Study Materials Pipeline"
# Setup
mkdir -p "$WORK_DIR"
check_dependencies
ensure_offline_docs
# Step 1: Get repository
print_header "Step 1/3: Getting Repository"
local repo_path
repo_path=$(get_repo "$input")
# Extract repo name from path (since get_repo runs in subshell, REPO_NAME is lost)
if [ -z "$REPO_NAME" ]; then
REPO_NAME=$(basename "$repo_path")
fi
# Set default output dir based on repo name
if [ -z "$output_dir" ]; then
output_dir="$STUDY_MATERIALS_BASE/$REPO_NAME"
elif [[ $output_dir != /* ]]; then
# Convert relative to absolute
output_dir="$(pwd)/$output_dir"
fi
echo -e "${BOLD}Input:${NC} $input" >&2
echo -e "${BOLD}Output:${NC} $output_dir" >&2
echo "" >&2
# Step 2: Analyze
print_header "Step 2/3: Analyzing Code"
local analysis_dir
analysis_dir=$(analyze_repo "$repo_path")
# Step 3: Generate materials
print_header "Step 3/3: Generating Study Materials"
generate_materials "$analysis_dir" "$output_dir"
# Show results
show_summary "$output_dir"
2026-01-07 17:03:07 +01:00
}
main "$@"