diff --git a/.gitignore b/.gitignore index df17d6e..2d09d8b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,15 @@ scripts/features/.nextcloud_raspberry.conf scripts/features/.raspberry_pi.conf .nextcloud_raspberry.conf .raspberry_pi.conf + +# Generated study materials (repo_to_study.sh output) +study_materials/ +**/study_materials/ +documentation_links.md +anki_cards.txt +llm_anki_prompt.md + +# Repo analysis temp files +/tmp/repo_analysis/ +*.cscope.out* +tags \ No newline at end of file diff --git a/scripts/digital_wellbeing/pacman/pacman_blocked_keywords.txt b/scripts/digital_wellbeing/pacman/pacman_blocked_keywords.txt index 8831ca4..472cf6b 100644 --- a/scripts/digital_wellbeing/pacman/pacman_blocked_keywords.txt +++ b/scripts/digital_wellbeing/pacman/pacman_blocked_keywords.txt @@ -53,3 +53,4 @@ netsurf amfora tartube youtube +virtualbox \ No newline at end of file diff --git a/scripts/fixes/fix_anki.sh b/scripts/fixes/fix_anki.sh new file mode 100755 index 0000000..fa3cc49 --- /dev/null +++ b/scripts/fixes/fix_anki.sh @@ -0,0 +1,224 @@ +#!/usr/bin/env bash + +# Fix Anki startup issues caused by Python version mismatch or aqt namespace conflict +# +# Common causes addressed: +# - anki-git built for older Python version (e.g., 3.13) while system runs newer (e.g., 3.14) +# - python-aqtinstall package conflicts with Anki's aqt module (same namespace) +# +# Usage: +# ./fix_anki.sh # Auto-fix (rebuild anki-git) +# ./fix_anki.sh --check # Only check for issues, don't fix + +set -euo pipefail + +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" +# shellcheck source=../lib/common.sh +source "$SCRIPT_DIR/../lib/common.sh" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +CHECK_ONLY=false + +usage() { + cat </dev/null; then + echo "anki-git" + elif pacman -Qi anki &>/dev/null; then + echo "anki" + elif pacman -Qi anki-bin &>/dev/null; then + echo "anki-bin" + else + echo "" + fi +} + +get_system_python_version() { + python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" +} + +get_anki_python_version() { + local anki_pkg="$1" + local anki_path + anki_path=$(pacman -Ql "$anki_pkg" 2>/dev/null | grep -oP '/usr/lib/python\K[0-9]+\.[0-9]+' | head -1) + echo "$anki_path" +} + +check_aqt_conflict() { + local sys_python="$1" + local aqt_path="/usr/lib/python${sys_python}/site-packages/aqt/__init__.py" + + if [[ -f "$aqt_path" ]]; then + if grep -q "aqtinstall" "$aqt_path" 2>/dev/null; then + echo "aqtinstall" + elif grep -q "anki" "$aqt_path" 2>/dev/null; then + echo "anki" + else + echo "unknown" + fi + else + echo "none" + fi +} + +main() { + # Parse arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --check) + CHECK_ONLY=true + shift + ;; + -h | --help) + usage + exit 0 + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac + done + + log_info "Checking Anki installation..." + + # Check which Anki package is installed + local anki_pkg + anki_pkg=$(check_anki_installed) + if [[ -z "$anki_pkg" ]]; then + log_error "Anki is not installed" + exit 1 + fi + log_info "Found Anki package: $anki_pkg" + + # Get Python versions + local sys_python anki_python + sys_python=$(get_system_python_version) + anki_python=$(get_anki_python_version "$anki_pkg") + + log_info "System Python version: $sys_python" + log_info "Anki built for Python: ${anki_python:-unknown}" + + local issues_found=false + + # Check for Python version mismatch + if [[ -n "$anki_python" && "$sys_python" != "$anki_python" ]]; then + log_warn "Python version mismatch detected!" + log_warn " Anki was built for Python $anki_python but system runs Python $sys_python" + issues_found=true + fi + + # Check for aqt namespace conflict + local aqt_owner + aqt_owner=$(check_aqt_conflict "$sys_python") + case "$aqt_owner" in + aqtinstall) + log_warn "aqt namespace conflict detected!" + log_warn " python-aqtinstall owns /usr/lib/python${sys_python}/site-packages/aqt/" + log_warn " This conflicts with Anki's aqt module" + issues_found=true + ;; + anki) + log_success "aqt module belongs to Anki (correct)" + ;; + none) + if [[ "$sys_python" != "$anki_python" ]]; then + log_warn "No aqt module found for Python $sys_python" + fi + ;; + *) + log_warn "Unknown aqt module owner" + ;; + esac + + # Test if Anki actually works + log_info "Testing Anki startup..." + if python -c "from aqt import run" 2>/dev/null; then + log_success "Anki imports work correctly" + if [[ "$issues_found" == "false" ]]; then + log_success "No issues found with Anki installation" + exit 0 + fi + else + log_error "Anki import test failed" + issues_found=true + fi + + if [[ "$CHECK_ONLY" == "true" ]]; then + if [[ "$issues_found" == "true" ]]; then + echo "" + log_info "Issues detected. Run without --check to fix." + exit 1 + fi + exit 0 + fi + + # Apply fixes + echo "" + log_info "Applying fixes..." + + # Check if python-aqtinstall is installed and remove it if nothing depends on it + if pacman -Qi python-aqtinstall &>/dev/null; then + local required_by + required_by=$(pacman -Qi python-aqtinstall | grep "Required By" | cut -d: -f2 | xargs) + if [[ "$required_by" == "None" ]]; then + log_info "Removing python-aqtinstall (conflicts with Anki)..." + sudo pacman -R --noconfirm python-aqtinstall + else + log_warn "python-aqtinstall is required by: $required_by" + log_warn "Cannot remove automatically. You may need to resolve this manually." + fi + fi + + # Rebuild anki package + if [[ "$anki_pkg" == "anki-git" ]]; then + log_info "Rebuilding anki-git for Python $sys_python..." + yay -S anki-git --rebuild --noconfirm + elif [[ "$anki_pkg" == "anki" ]]; then + log_info "Reinstalling anki..." + sudo pacman -S anki --noconfirm + else + log_warn "Package $anki_pkg may need manual rebuild" + fi + + # Verify fix + echo "" + log_info "Verifying fix..." + if python -c "from aqt import run" 2>/dev/null; then + log_success "Anki is now working!" + echo "" + echo "You can start Anki with: anki" + else + log_error "Fix may not have worked. Please check manually." + exit 1 + fi +} + +main "$@" diff --git a/scripts/fixes/stremio b/scripts/fixes/stremio new file mode 160000 index 0000000..4c3c999 --- /dev/null +++ b/scripts/fixes/stremio @@ -0,0 +1 @@ +Subproject commit 4c3c9996956221f0cae49f69e0597e33aee33ee1 diff --git a/scripts/utils/analyze_repo.sh b/scripts/utils/analyze_repo.sh new file mode 100755 index 0000000..48a3628 --- /dev/null +++ b/scripts/utils/analyze_repo.sh @@ -0,0 +1,866 @@ +#!/bin/bash +# Analyze a git repository for most-used keywords, functions, etc. +# Usage: ./analyze_repo.sh [repo_url_or_local_path] [output_dir] [--no-ignore] +# +# Examples: +# ./analyze_repo.sh https://github.com/torvalds/linux # Clone from URL +# ./analyze_repo.sh /path/to/local/repo # Use local directory +# ./analyze_repo.sh . # Analyze current directory +# ./analyze_repo.sh . /tmp/out --no-ignore # Include node_modules, etc. + +set -e + +# Parse arguments +INPUT="" +WORK_DIR="" +RESPECT_GITIGNORE=true + +for arg in "$@"; do + case "$arg" in + --no-ignore) + RESPECT_GITIGNORE=false + ;; + *) + if [ -z "$INPUT" ]; then + INPUT="$arg" + elif [ -z "$WORK_DIR" ]; then + WORK_DIR="$arg" + fi + ;; + esac +done + +INPUT="${INPUT:-https://github.com/torvalds/linux}" +WORK_DIR="${WORK_DIR:-/tmp/repo_analysis}" +TOP_N=50 # Number of top results to show + +# Directories to exclude (unless --no-ignore is used) +EXCLUDE_DIRS="node_modules|\.git|vendor|\.venv|venv|__pycache__|\.cache|build|dist|\.next|\.nuxt|target|\.tox|\.eggs" + +# Detect if input is a URL or local path +is_url() { + [[ "$1" =~ ^https?:// ]] || [[ "$1" =~ ^git@ ]] || [[ "$1" =~ ^ssh:// ]] +} + +IS_LOCAL=false +if is_url "$INPUT"; then + REPO_URL="$INPUT" + REPO_NAME=$(basename "$REPO_URL" .git) + REPO_DIR="$WORK_DIR/$REPO_NAME" +else + # Local path - resolve to absolute path + IS_LOCAL=true + if [ -d "$INPUT" ]; then + REPO_DIR=$(cd "$INPUT" && pwd) + REPO_NAME=$(basename "$REPO_DIR") + else + echo "Error: '$INPUT' is not a valid directory or URL" + exit 1 + fi +fi + +RESULTS_DIR="$WORK_DIR/results_${REPO_NAME}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +print_header() { + echo "" + echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" + echo -e "${GREEN} $1${NC}" + echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" + echo "" +} + +print_subheader() { + echo "" + echo -e "${YELLOW}--- $1 ---${NC}" + echo "" +} + +# Check if we're in a git repository +is_git_repo() { + git rev-parse --is-inside-work-tree &>/dev/null +} + +# Helper function to find files while respecting exclusions +# Usage: find_files "*.c" or find_files "*.py" "*.pyx" +find_files() { + local patterns=("$@") + + if [ "$RESPECT_GITIGNORE" = true ]; then + if is_git_repo; then + # Use git ls-files which respects .gitignore automatically + # This includes tracked files and untracked files not in .gitignore + local git_patterns=() + for pat in "${patterns[@]}"; do + git_patterns+=("$pat") + done + # Get tracked files + untracked (but not ignored) files + { + git ls-files -- "${git_patterns[@]}" 2>/dev/null + git ls-files --others --exclude-standard -- "${git_patterns[@]}" 2>/dev/null + } | sort -u + else + # Not a git repo - fall back to manual exclusion + local find_args=() + for i in "${!patterns[@]}"; do + if [ $i -eq 0 ]; then + find_args+=(-name "${patterns[$i]}") + else + find_args+=(-o -name "${patterns[$i]}") + fi + done + find . -type f \( "${find_args[@]}" \) 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" + fi + else + # No filtering - find all files + local find_args=() + for i in "${!patterns[@]}"; do + if [ $i -eq 0 ]; then + find_args+=(-name "${patterns[$i]}") + else + find_args+=(-o -name "${patterns[$i]}") + fi + done + find . -type f \( "${find_args[@]}" \) 2>/dev/null + fi +} + +# Count files matching pattern (respecting exclusions) +count_files() { + find_files "$@" | wc -l +} + +#============================================================================== +# STEP 0: Install Missing Tools +#============================================================================== +install_missing_tools() { + local MISSING_TOOLS=() + local MISSING_AUR=() + + # Check for required tools + command -v git &> /dev/null || MISSING_TOOLS+=("git") + command -v ctags &> /dev/null || MISSING_TOOLS+=("ctags") + command -v cscope &> /dev/null || MISSING_TOOLS+=("cscope") + command -v clang &> /dev/null || MISSING_TOOLS+=("clang") + command -v ugrep &> /dev/null || MISSING_TOOLS+=("ugrep") + + # Check for AUR tools + command -v tokei &> /dev/null || MISSING_AUR+=("tokei") + command -v scc &> /dev/null || MISSING_AUR+=("scc") + + # Check for Rust 'counts' tool (install via cargo if missing) + if ! command -v counts &> /dev/null; then + if command -v cargo &> /dev/null; then + echo "Installing 'counts' via cargo (fast word counter)..." + cargo install counts 2>/dev/null || echo "Warning: counts install failed, will use Python fallback" + fi + fi + + # If nothing is missing, return + if [ ${#MISSING_TOOLS[@]} -eq 0 ] && [ ${#MISSING_AUR[@]} -eq 0 ]; then + echo -e "${GREEN}All required tools are installed.${NC}" + return 0 + fi + + echo -e "${YELLOW}Missing tools detected. Installing...${NC}" + + # Detect package manager + if command -v pacman &> /dev/null; then + # Arch Linux + if [ ${#MISSING_TOOLS[@]} -gt 0 ]; then + echo "Installing from official repos: ${MISSING_TOOLS[*]}" + sudo pacman -S --needed --noconfirm "${MISSING_TOOLS[@]}" + fi + + if [ ${#MISSING_AUR[@]} -gt 0 ]; then + # Find or install AUR helper + if command -v yay &> /dev/null; then + AUR_HELPER="yay" + elif command -v paru &> /dev/null; then + AUR_HELPER="paru" + else + echo "No AUR helper found. Installing yay..." + sudo pacman -S --needed --noconfirm base-devel git + TEMP_DIR=$(mktemp -d) + git clone https://aur.archlinux.org/yay.git "$TEMP_DIR/yay" + (cd "$TEMP_DIR/yay" && makepkg -si --noconfirm) + rm -rf "$TEMP_DIR" + AUR_HELPER="yay" + fi + + echo "Installing from AUR: ${MISSING_AUR[*]}" + $AUR_HELPER -S --needed --noconfirm "${MISSING_AUR[@]}" + fi + + elif command -v apt-get &> /dev/null; then + # Debian/Ubuntu + echo "Installing tools via apt..." + sudo apt-get update + + # Map tool names to package names + APT_PACKAGES=() + for tool in "${MISSING_TOOLS[@]}"; do + case $tool in + ctags) APT_PACKAGES+=("universal-ctags") ;; + ugrep) APT_PACKAGES+=("ugrep") ;; + *) APT_PACKAGES+=("$tool") ;; + esac + done + + [ ${#APT_PACKAGES[@]} -gt 0 ] && sudo apt-get install -y "${APT_PACKAGES[@]}" + + # Install tokei/scc via cargo or snap + for aur_tool in "${MISSING_AUR[@]}"; do + if command -v cargo &> /dev/null; then + echo "Installing $aur_tool via cargo..." + cargo install "$aur_tool" + elif command -v snap &> /dev/null; then + echo "Installing $aur_tool via snap..." + sudo snap install "$aur_tool" + else + echo -e "${YELLOW}Warning: Cannot install $aur_tool. Install cargo or snap first.${NC}" + fi + done + + elif command -v dnf &> /dev/null; then + # Fedora + echo "Installing tools via dnf..." + sudo dnf install -y "${MISSING_TOOLS[@]}" "${MISSING_AUR[@]}" 2>/dev/null || { + # tokei/scc might need cargo + for aur_tool in "${MISSING_AUR[@]}"; do + if command -v cargo &> /dev/null; then + cargo install "$aur_tool" + fi + done + } + + elif command -v brew &> /dev/null; then + # macOS with Homebrew + echo "Installing tools via brew..." + ALL_TOOLS=("${MISSING_TOOLS[@]}" "${MISSING_AUR[@]}") + brew install "${ALL_TOOLS[@]}" + + else + echo -e "${RED}Unknown package manager. Please install these tools manually:${NC}" + echo " Official: ${MISSING_TOOLS[*]}" + echo " Additional: ${MISSING_AUR[*]}" + exit 1 + fi + + echo -e "${GREEN}Tool installation complete.${NC}" +} + +print_header "STEP 0: Checking/Installing Required Tools" +install_missing_tools + +# Create directories +mkdir -p "$WORK_DIR" "$RESULTS_DIR" + +#============================================================================== +# STEP 1: Clone or Use Local Repository +#============================================================================== +print_header "STEP 1: Repository Setup" + +if [ "$IS_LOCAL" = true ]; then + echo "Using local repository: $REPO_DIR" + if [ ! -d "$REPO_DIR" ]; then + echo "Error: Directory does not exist: $REPO_DIR" + exit 1 + fi +else + # Remote URL - clone it + if [ -d "$REPO_DIR" ]; then + echo "Repository already exists at $REPO_DIR" + echo "Updating..." + cd "$REPO_DIR" + git pull --depth 1 2>/dev/null || echo "Update skipped (shallow clone)" + else + echo "Cloning $REPO_URL (shallow clone for speed)..." + git clone --depth 1 "$REPO_URL" "$REPO_DIR" + fi +fi + +cd "$REPO_DIR" +echo "Repository: $REPO_NAME" +echo "Location: $REPO_DIR" +echo "Repository size: $(du -sh . | cut -f1)" +if [ "$RESPECT_GITIGNORE" = true ] && is_git_repo; then + # Count files respecting .gitignore + FILE_COUNT=$({ git ls-files 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null; } | sort -u | wc -l) + echo "Files: $FILE_COUNT (respecting .gitignore)" +elif [ "$RESPECT_GITIGNORE" = true ]; then + echo "Files: $(find . -type f 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" | wc -l) (excluding common dirs)" +else + echo "Files: $(find . -type f | wc -l)" +fi + +#============================================================================== +# STEP 2: Basic Statistics with tokei +#============================================================================== +print_header "STEP 2: Code Statistics with tokei" + +echo "Running tokei..." +tokei . | tee "$RESULTS_DIR/tokei_stats.txt" + +#============================================================================== +# STEP 3: Code Statistics with scc +#============================================================================== +print_header "STEP 3: Code Statistics with scc (includes complexity)" + +echo "Running scc..." +scc . | tee "$RESULTS_DIR/scc_stats.txt" + +print_subheader "Top 10 Most Complex Files" +scc --by-file --sort complexity . 2>/dev/null | head -20 | tee "$RESULTS_DIR/scc_complexity.txt" + +#============================================================================== +# STEP 4: Fast Keyword Analysis (Code vs Comments) - Multi-Language +#============================================================================== +print_header "STEP 4: Fast Keyword Analysis (Code vs Comments)" + +# Helper function for fast word counting +# Uses 'counts' (Rust) if available, falls back to Python Counter +fast_count() { + local top_n="${1:-50}" + if command -v counts &> /dev/null; then + counts 2>/dev/null | head -$((top_n + 1)) | tail -$top_n + else + python3 -c " +import sys +from collections import Counter +c = Counter(line.rstrip() for line in sys.stdin) +for word, count in c.most_common($top_n): + print(f'{count} {word}') +" + fi +} + +#------------------------------------------------------------------------------ +# Language Detection and Configuration +#------------------------------------------------------------------------------ +print_subheader "Detecting languages in repository..." + +if [ "$RESPECT_GITIGNORE" = true ]; then + if is_git_repo; then + echo -e "${YELLOW}Note: Respecting .gitignore (excludes node_modules, build outputs, etc.)${NC}" + else + echo -e "${YELLOW}Note: Excluding common directories (node_modules, .git, vendor, etc.)${NC}" + fi + echo " Use --no-ignore to include everything." + echo "" +fi + +# Count files by extension to detect primary languages (using helper) +declare -A LANG_FILES +LANG_FILES[c]=$(count_files "*.c") +LANG_FILES[cpp]=$(count_files "*.cpp" "*.cc" "*.cxx") +LANG_FILES[h]=$(count_files "*.h" "*.hpp" "*.hxx") +LANG_FILES[python]=$(count_files "*.py") +LANG_FILES[javascript]=$(count_files "*.js") +LANG_FILES[typescript]=$(count_files "*.ts" "*.tsx") +LANG_FILES[java]=$(count_files "*.java") +LANG_FILES[go]=$(count_files "*.go") +LANG_FILES[rust]=$(count_files "*.rs") +LANG_FILES[ruby]=$(count_files "*.rb") +LANG_FILES[shell]=$(count_files "*.sh" "*.bash") + +echo "Files found by language:" +for lang in c cpp h python javascript typescript java go rust ruby shell; do + count=${LANG_FILES[$lang]} + [ "$count" -gt 0 ] && echo " $lang: $count files" +done + +# Determine which language families are present +HAS_C_FAMILY=false +HAS_PYTHON=false +HAS_JS_FAMILY=false +HAS_SHELL=false +HAS_RUBY=false +HAS_GO=false +HAS_RUST=false +HAS_JAVA=false + +(( ${LANG_FILES[c]} + ${LANG_FILES[cpp]} + ${LANG_FILES[h]} > 0 )) && HAS_C_FAMILY=true +(( ${LANG_FILES[python]} > 0 )) && HAS_PYTHON=true +(( ${LANG_FILES[javascript]} + ${LANG_FILES[typescript]} > 0 )) && HAS_JS_FAMILY=true +(( ${LANG_FILES[shell]} > 0 )) && HAS_SHELL=true +(( ${LANG_FILES[ruby]} > 0 )) && HAS_RUBY=true +(( ${LANG_FILES[go]} > 0 )) && HAS_GO=true +(( ${LANG_FILES[rust]} > 0 )) && HAS_RUST=true +(( ${LANG_FILES[java]} > 0 )) && HAS_JAVA=true + +#------------------------------------------------------------------------------ +# Language-specific keyword definitions +#------------------------------------------------------------------------------ +# C/C++ keywords +KEYWORDS_C="auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while|inline|restrict|_Bool|_Complex|_Imaginary" +KEYWORDS_CPP="$KEYWORDS_C|alignas|alignof|and|and_eq|asm|atomic_cancel|atomic_commit|atomic_noexcept|bitand|bitor|bool|catch|char16_t|char32_t|char8_t|class|co_await|co_return|co_yield|compl|concept|const_cast|consteval|constexpr|constinit|decltype|delete|dynamic_cast|explicit|export|false|friend|mutable|namespace|new|noexcept|not|not_eq|nullptr|operator|or|or_eq|override|private|protected|public|reflexpr|reinterpret_cast|requires|static_assert|static_cast|synchronized|template|this|thread_local|throw|true|try|typeid|typename|using|virtual|wchar_t|xor|xor_eq" + +# Python keywords +KEYWORDS_PYTHON="False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield" + +# JavaScript/TypeScript keywords +KEYWORDS_JS="abstract|arguments|await|boolean|break|byte|case|catch|char|class|const|continue|debugger|default|delete|do|double|else|enum|eval|export|extends|false|final|finally|float|for|function|goto|if|implements|import|in|instanceof|int|interface|let|long|native|new|null|package|private|protected|public|return|short|static|super|switch|synchronized|this|throw|throws|transient|true|try|typeof|undefined|var|void|volatile|while|with|yield" +KEYWORDS_TS="$KEYWORDS_JS|any|as|asserts|bigint|declare|get|infer|intrinsic|is|keyof|module|namespace|never|out|override|readonly|require|set|string|symbol|type|unique|unknown" + +# Go keywords +KEYWORDS_GO="break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var" + +# Rust keywords +KEYWORDS_RUST="as|async|await|break|const|continue|crate|dyn|else|enum|extern|false|fn|for|if|impl|in|let|loop|match|mod|move|mut|pub|ref|return|self|Self|static|struct|super|trait|true|type|unsafe|use|where|while" + +# Ruby keywords +KEYWORDS_RUBY="BEGIN|END|alias|and|begin|break|case|class|def|defined|do|else|elsif|end|ensure|false|for|if|in|module|next|nil|not|or|redo|rescue|retry|return|self|super|then|true|undef|unless|until|when|while|yield" +#------------------------------------------------------------------------------ +# Multi-language comment processing - KEEP LANGUAGES SEPARATE +#------------------------------------------------------------------------------ +print_subheader "Processing source files (separating code from comments)..." + +# Create per-language output directory +mkdir -p "$RESULTS_DIR/per_language" +COMMENTS_TEMP=$(mktemp) +trap 'rm -f "$COMMENTS_TEMP" /tmp/code_*.tmp 2>/dev/null' EXIT + +declare -A LANG_CODE_FILES + +# Process C/C++ files +if $HAS_C_FAMILY; then + echo "Processing C/C++ files..." + LANG_CODE_FILES[c_cpp]=$(mktemp /tmp/code_c_cpp.XXXXXX.tmp) + find_files "*.c" "*.cpp" "*.cc" "*.cxx" "*.h" "*.hpp" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[c_cpp]}" + + # Extract and strip C-style comments + perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[c_cpp]}" >> "$COMMENTS_TEMP" + perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[c_cpp]}" > "${LANG_CODE_FILES[c_cpp]}.clean" + mv "${LANG_CODE_FILES[c_cpp]}.clean" "${LANG_CODE_FILES[c_cpp]}" +fi + +# Process JavaScript files (separate from TypeScript) +if $HAS_JS_FAMILY; then + echo "Processing JavaScript files..." + LANG_CODE_FILES[javascript]=$(mktemp /tmp/code_js.XXXXXX.tmp) + find_files "*.js" "*.jsx" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[javascript]}" + + echo "Processing TypeScript files..." + LANG_CODE_FILES[typescript]=$(mktemp /tmp/code_ts.XXXXXX.tmp) + find_files "*.ts" "*.tsx" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[typescript]}" + + # Extract and strip comments from both + for lang_file in "${LANG_CODE_FILES[javascript]}" "${LANG_CODE_FILES[typescript]}"; do + [ ! -s "$lang_file" ] && continue + perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "$lang_file" >> "$COMMENTS_TEMP" + perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "$lang_file" > "${lang_file}.clean" + mv "${lang_file}.clean" "$lang_file" + done +fi + +# Process Python files +if $HAS_PYTHON; then + echo "Processing Python files..." + LANG_CODE_FILES[python]=$(mktemp /tmp/code_python.XXXXXX.tmp) + find_files "*.py" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[python]}" + + perl -ne 'if (/^\s*#(.*)/) { print "$1\n"; } elsif (/#(.*)$/) { print "$1\n"; }' "${LANG_CODE_FILES[python]}" >> "$COMMENTS_TEMP" + perl -0777 -ne 'while (/"""(.+?)"""/gs) { print "$1\n"; } while (/'"'"''"'"''"'"'(.+?)'"'"''"'"''"'"'/gs) { print "$1\n"; }' "${LANG_CODE_FILES[python]}" >> "$COMMENTS_TEMP" + perl -pe 's/#.*$//' "${LANG_CODE_FILES[python]}" | perl -0777 -pe 's/""".*?"""//gs; s/'"'"''"'"''"'"'.*?'"'"''"'"''"'"'//gs' > "${LANG_CODE_FILES[python]}.clean" + mv "${LANG_CODE_FILES[python]}.clean" "${LANG_CODE_FILES[python]}" +fi + +# Process Go files +if $HAS_GO; then + echo "Processing Go files..." + LANG_CODE_FILES[go]=$(mktemp /tmp/code_go.XXXXXX.tmp) + find_files "*.go" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[go]}" + + perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[go]}" >> "$COMMENTS_TEMP" + perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[go]}" > "${LANG_CODE_FILES[go]}.clean" + mv "${LANG_CODE_FILES[go]}.clean" "${LANG_CODE_FILES[go]}" +fi + +# Process Rust files +if $HAS_RUST; then + echo "Processing Rust files..." + LANG_CODE_FILES[rust]=$(mktemp /tmp/code_rust.XXXXXX.tmp) + find_files "*.rs" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[rust]}" + + perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[rust]}" >> "$COMMENTS_TEMP" + perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[rust]}" > "${LANG_CODE_FILES[rust]}.clean" + mv "${LANG_CODE_FILES[rust]}.clean" "${LANG_CODE_FILES[rust]}" +fi + +# Process Ruby files +if $HAS_RUBY; then + echo "Processing Ruby files..." + LANG_CODE_FILES[ruby]=$(mktemp /tmp/code_ruby.XXXXXX.tmp) + find_files "*.rb" | head -5000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[ruby]}" + + perl -ne 'if (/#(.*)$/) { print "$1\n"; }' "${LANG_CODE_FILES[ruby]}" >> "$COMMENTS_TEMP" + perl -0777 -ne 'while (/=begin(.+?)=end/gs) { print "$1\n"; }' "${LANG_CODE_FILES[ruby]}" >> "$COMMENTS_TEMP" + perl -pe 's/#.*$//' "${LANG_CODE_FILES[ruby]}" | perl -0777 -pe 's/=begin.*?=end//gs' > "${LANG_CODE_FILES[ruby]}.clean" + mv "${LANG_CODE_FILES[ruby]}.clean" "${LANG_CODE_FILES[ruby]}" +fi + +# Process Shell files +if $HAS_SHELL; then + echo "Processing Shell files..." + LANG_CODE_FILES[shell]=$(mktemp /tmp/code_shell.XXXXXX.tmp) + find_files "*.sh" "*.bash" | head -5000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[shell]}" + + perl -ne 'if (/^\s*#(.*)/ && !/^#!/) { print "$1\n"; } elsif (/#(.*)$/) { print "$1\n"; }' "${LANG_CODE_FILES[shell]}" >> "$COMMENTS_TEMP" + perl -pe 's/#.*$//' "${LANG_CODE_FILES[shell]}" > "${LANG_CODE_FILES[shell]}.clean" + mv "${LANG_CODE_FILES[shell]}.clean" "${LANG_CODE_FILES[shell]}" +fi + +# Process Java files +if $HAS_JAVA; then + echo "Processing Java files..." + LANG_CODE_FILES[java]=$(mktemp /tmp/code_java.XXXXXX.tmp) + find_files "*.java" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[java]}" + + perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[java]}" >> "$COMMENTS_TEMP" + perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[java]}" > "${LANG_CODE_FILES[java]}.clean" + mv "${LANG_CODE_FILES[java]}.clean" "${LANG_CODE_FILES[java]}" +fi + +COMMENT_LINES=$(wc -l < "$COMMENTS_TEMP") +echo "" +echo "Processed languages: ${!LANG_CODE_FILES[*]}" +echo "Total comment lines: $COMMENT_LINES" + +#------------------------------------------------------------------------------ +# Per-Language Keyword Analysis - Each language gets its own file +#------------------------------------------------------------------------------ +print_subheader "Per-Language Keyword Analysis" + +# Map language names to keyword variables +declare -A LANG_KEYWORDS +LANG_KEYWORDS[c_cpp]="$KEYWORDS_CPP" +LANG_KEYWORDS[python]="$KEYWORDS_PYTHON" +LANG_KEYWORDS[javascript]="$KEYWORDS_JS" +LANG_KEYWORDS[typescript]="$KEYWORDS_TS" +LANG_KEYWORDS[go]="$KEYWORDS_GO" +LANG_KEYWORDS[rust]="$KEYWORDS_RUST" +LANG_KEYWORDS[ruby]="$KEYWORDS_RUBY" +LANG_KEYWORDS[shell]="$KEYWORDS_SHELL" +LANG_KEYWORDS[java]="$KEYWORDS_JAVA" + +# Analyze each language separately +for lang in "${!LANG_CODE_FILES[@]}"; do + code_file="${LANG_CODE_FILES[$lang]}" + keywords="${LANG_KEYWORDS[$lang]}" + output_file="$RESULTS_DIR/per_language/keywords_${lang}.txt" + + if [ -f "$code_file" ] && [ -s "$code_file" ] && [ -n "$keywords" ]; then + echo "" + echo -e "${YELLOW}=== $lang Keywords ===${NC}" + ugrep -o "\b($keywords)\b" "$code_file" 2>/dev/null \ + | fast_count 50 \ + | tee "$output_file" + fi +done + +#------------------------------------------------------------------------------ +# Per-Language Function Analysis +#------------------------------------------------------------------------------ +print_subheader "Per-Language Function Calls" + +for lang in "${!LANG_CODE_FILES[@]}"; do + code_file="${LANG_CODE_FILES[$lang]}" + output_file="$RESULTS_DIR/per_language/functions_${lang}.txt" + + if [ -f "$code_file" ] && [ -s "$code_file" ]; then + echo "" + echo -e "${YELLOW}=== $lang Functions ===${NC}" + ugrep -o '\b[a-zA-Z_][a-zA-Z0-9_]*\s*\(' "$code_file" 2>/dev/null \ + | sed 's/\s*(//' \ + | grep -vE '^(if|for|while|switch|catch|elif)$' \ + | fast_count 30 \ + | tee "$output_file" + fi +done + +#------------------------------------------------------------------------------ +# Per-Language Import Analysis +#------------------------------------------------------------------------------ +print_subheader "Per-Language Imports/Includes" + +# C/C++ includes +if [ -n "${LANG_CODE_FILES[c_cpp]}" ] && [ -s "${LANG_CODE_FILES[c_cpp]}" ]; then + echo -e "${YELLOW}=== C/C++ Includes ===${NC}" + ugrep -o '#include\s*[<"][^>"]+[>"]' "${LANG_CODE_FILES[c_cpp]}" 2>/dev/null \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_c_cpp.txt" +fi + +# Python imports +if [ -n "${LANG_CODE_FILES[python]}" ] && [ -s "${LANG_CODE_FILES[python]}" ]; then + echo "" + echo -e "${YELLOW}=== Python Imports ===${NC}" + ugrep -o '^\s*(from\s+\S+\s+import\s+\S+|import\s+\S+)' "${LANG_CODE_FILES[python]}" 2>/dev/null \ + | sed 's/^\s*//' \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_python.txt" +fi + +# JavaScript imports +if [ -n "${LANG_CODE_FILES[javascript]}" ] && [ -s "${LANG_CODE_FILES[javascript]}" ]; then + echo "" + echo -e "${YELLOW}=== JavaScript Imports ===${NC}" + ugrep -o "(import\s+.*\s+from\s+['\"][^'\"]+['\"]|require\s*\(['\"][^'\"]+['\"]\))" "${LANG_CODE_FILES[javascript]}" 2>/dev/null \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_javascript.txt" +fi + +# TypeScript imports +if [ -n "${LANG_CODE_FILES[typescript]}" ] && [ -s "${LANG_CODE_FILES[typescript]}" ]; then + echo "" + echo -e "${YELLOW}=== TypeScript Imports ===${NC}" + ugrep -o "(import\s+.*\s+from\s+['\"][^'\"]+['\"]|require\s*\(['\"][^'\"]+['\"]\))" "${LANG_CODE_FILES[typescript]}" 2>/dev/null \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_typescript.txt" +fi + +# Go imports +if [ -n "${LANG_CODE_FILES[go]}" ] && [ -s "${LANG_CODE_FILES[go]}" ]; then + echo "" + echo -e "${YELLOW}=== Go Imports ===${NC}" + ugrep -o '"[^"]+/[^"]+"' "${LANG_CODE_FILES[go]}" 2>/dev/null \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_go.txt" +fi + +# Rust use statements +if [ -n "${LANG_CODE_FILES[rust]}" ] && [ -s "${LANG_CODE_FILES[rust]}" ]; then + echo "" + echo -e "${YELLOW}=== Rust Use Statements ===${NC}" + ugrep -o '^\s*use\s+[^;]+' "${LANG_CODE_FILES[rust]}" 2>/dev/null \ + | sed 's/^\s*//' \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_rust.txt" +fi + +# Java imports +if [ -n "${LANG_CODE_FILES[java]}" ] && [ -s "${LANG_CODE_FILES[java]}" ]; then + echo "" + echo -e "${YELLOW}=== Java Imports ===${NC}" + ugrep -o '^\s*import\s+[^;]+' "${LANG_CODE_FILES[java]}" 2>/dev/null \ + | sed 's/^\s*//' \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_java.txt" +fi + +# Ruby requires +if [ -n "${LANG_CODE_FILES[ruby]}" ] && [ -s "${LANG_CODE_FILES[ruby]}" ]; then + echo "" + echo -e "${YELLOW}=== Ruby Requires ===${NC}" + ugrep -o "(require\s+['\"][^'\"]+['\"]|require_relative\s+['\"][^'\"]+['\"])" "${LANG_CODE_FILES[ruby]}" 2>/dev/null \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_ruby.txt" +fi + +# Shell sources +if [ -n "${LANG_CODE_FILES[shell]}" ] && [ -s "${LANG_CODE_FILES[shell]}" ]; then + echo "" + echo -e "${YELLOW}=== Shell Sources ===${NC}" + ugrep -o '(source\s+[^\s]+|\.\s+[^\s]+)' "${LANG_CODE_FILES[shell]}" 2>/dev/null \ + | fast_count 30 \ + | tee "$RESULTS_DIR/per_language/imports_shell.txt" +fi + +#------------------------------------------------------------------------------ +# Combined Analysis (for overview/backward compatibility) +#------------------------------------------------------------------------------ +print_subheader "Combined Code Identifiers (all languages)" + +# Create combined CODE_TEMP +CODE_TEMP=$(mktemp) +for lang_file in "${LANG_CODE_FILES[@]}"; do + [ -f "$lang_file" ] && cat "$lang_file" >> "$CODE_TEMP" +done + +ugrep -o '\b[a-zA-Z_][a-zA-Z0-9_]*\b' "$CODE_TEMP" 2>/dev/null \ + | fast_count $TOP_N \ + | tee "$RESULTS_DIR/code_identifiers.txt" + +print_subheader "Most Used Words in COMMENTS" +ugrep -o '\b[a-zA-Z_][a-zA-Z0-9_]*\b' "$COMMENTS_TEMP" 2>/dev/null \ + | fast_count $TOP_N \ + | tee "$RESULTS_DIR/comment_words.txt" + +# Create combined files from per-language analysis (for backward compatibility) +{ + echo "# Combined keywords from all languages" + echo "# Format: count keyword (from per_language/keywords_*.txt)" + cat "$RESULTS_DIR/per_language"/keywords_*.txt 2>/dev/null | grep -v '^$' | sort -t' ' -k1 -nr | head -100 +} > "$RESULTS_DIR/grep_keywords.txt" + +{ + echo "# Combined functions from all languages" + echo "# See per_language/functions_*.txt for language-specific breakdown" + cat "$RESULTS_DIR/per_language"/functions_*.txt 2>/dev/null | grep -v '^$' | sort -t' ' -k1 -nr | head -100 +} > "$RESULTS_DIR/grep_function_calls.txt" + +{ + echo "# Combined imports from all languages" + echo "# See per_language/imports_*.txt for language-specific breakdown" + cat "$RESULTS_DIR/per_language"/imports_*.txt 2>/dev/null | grep -v '^$' | sort -t' ' -k1 -nr | head -100 +} > "$RESULTS_DIR/grep_imports.txt" + +# List what per-language files were created +echo "" +echo "Per-language analysis files created:" +ls -la "$RESULTS_DIR/per_language/" 2>/dev/null | grep -v '^total' | awk '{print " " $NF}' + + +print_subheader "Generating tags (this may take a while)..." + +# Generate tags for different kinds +ctags -R --languages=C,C++ --c-kinds=+fp --fields=+lK -f "$RESULTS_DIR/tags" . 2>/dev/null || true + +if [ -f "$RESULTS_DIR/tags" ]; then + TOTAL_TAGS=$(grep -ac '^[^!]' "$RESULTS_DIR/tags" 2>/dev/null || echo "0") + echo "Total symbols found: $TOTAL_TAGS" + + print_subheader "Most Common Symbol Names" + # Fast: use cut + counts instead of awk + sort | uniq + # -a flag treats tags file as text (may contain binary-like patterns) + grep -a '^[^!]' "$RESULTS_DIR/tags" | cut -f1 | fast_count $TOP_N \ + | tee "$RESULTS_DIR/ctags_symbols.txt" + + print_subheader "Symbol Types Distribution" + # Fast: extract single-letter kind code after ;" and count + grep -aoP ';"\t\K[a-z]' "$RESULTS_DIR/tags" 2>/dev/null | fast_count 20 | while read count kind; do + case $kind in + f) echo "$count functions" ;; + v) echo "$count variables" ;; + s) echo "$count structs" ;; + t) echo "$count typedefs" ;; + e) echo "$count enum values" ;; + g) echo "$count enums" ;; + m) echo "$count struct/union members" ;; + d) echo "$count macro definitions" ;; + p) echo "$count function prototypes" ;; + u) echo "$count unions" ;; + c) echo "$count classes" ;; + n) echo "$count namespaces" ;; + *) echo "$count kind=$kind" ;; + esac + done | tee "$RESULTS_DIR/ctags_kinds.txt" +fi + +#============================================================================== +# STEP 6: cscope Analysis +#============================================================================== +print_header "STEP 6: cscope Database Analysis" + +print_subheader "Building cscope database..." + +# Find all C source files (respecting .gitignore if available) +if [ "$RESPECT_GITIGNORE" = true ] && is_git_repo; then + { git ls-files -- '*.c' '*.h' 2>/dev/null; git ls-files --others --exclude-standard -- '*.c' '*.h' 2>/dev/null; } | sort -u > "$RESULTS_DIR/cscope.files" +elif [ "$RESPECT_GITIGNORE" = true ]; then + find . \( -name "*.c" -o -name "*.h" \) -type f 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" > "$RESULTS_DIR/cscope.files" +else + find . \( -name "*.c" -o -name "*.h" \) -type f > "$RESULTS_DIR/cscope.files" 2>/dev/null +fi +FILE_COUNT=$(wc -l < "$RESULTS_DIR/cscope.files") +echo "Found $FILE_COUNT source files" + +# Build cscope database (can take a while for large repos) +echo "Building database (this may take several minutes for Linux kernel)..." +cscope -b -q -i "$RESULTS_DIR/cscope.files" -f "$RESULTS_DIR/cscope.out" 2>/dev/null || true + +if [ -f "$RESULTS_DIR/cscope.out" ]; then + echo "Database built successfully" + echo "Database size: $(du -sh "$RESULTS_DIR/cscope.out" | cut -f1)" + + print_subheader "Example: Finding callers of 'printk' function" + cscope -d -f "$RESULTS_DIR/cscope.out" -L -3 printk 2>/dev/null | head -20 || echo "No results" + + print_subheader "Example: Finding definition of 'struct file'" + cscope -d -f "$RESULTS_DIR/cscope.out" -L -1 "struct file" 2>/dev/null | head -10 || echo "No results" +fi + +#============================================================================== +# STEP 7: clang AST Analysis (if available) +#============================================================================== +print_header "STEP 7: clang-based Analysis (AST-level)" + +print_subheader "Analyzing a sample file with clang AST dump" + +# Find a simple C file to analyze (respecting .gitignore) +if [ "$RESPECT_GITIGNORE" = true ] && is_git_repo; then + SAMPLE_FILE=$(git ls-files -- '*.c' 2>/dev/null | head -20 | while read -r f; do + [ -f "$f" ] && [ "$(stat -c%s "$f" 2>/dev/null || echo 999999)" -lt 51200 ] && echo "$f" + done | head -1) +elif [ "$RESPECT_GITIGNORE" = true ]; then + SAMPLE_FILE=$(find . -name "*.c" -size -50k -type f 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" | head -1) +else + SAMPLE_FILE=$(find . -name "*.c" -size -50k 2>/dev/null | head -1) +fi + +if [ -n "$SAMPLE_FILE" ]; then + echo "Sample file: $SAMPLE_FILE" + echo "" + echo "Function declarations in this file:" + clang -Xclang -ast-dump -fsyntax-only "$SAMPLE_FILE" 2>/dev/null \ + | grep -E "FunctionDecl.*<.*>" \ + | head -20 \ + | sed 's/.*FunctionDecl.*<[^>]*> / /' \ + | tee "$RESULTS_DIR/clang_sample_functions.txt" || echo "Analysis failed (missing headers)" +fi + +print_subheader "Note: Full clang analysis requires compile_commands.json" +echo "For proper AST analysis of the Linux kernel, you need to:" +echo " 1. Configure the kernel: make defconfig" +echo " 2. Generate compile_commands.json: make compile_commands.json" +echo " 3. Use clang-query or clang-check with the database" + +#============================================================================== +# STEP 8: Summary +#============================================================================== +print_header "ANALYSIS COMPLETE" + +echo "Results saved to: $RESULTS_DIR/" +echo "" +ls -la "$RESULTS_DIR/" + +echo "" +echo -e "${GREEN}Quick Summary:${NC}" +echo "" + +if [ -f "$RESULTS_DIR/grep_keywords.txt" ]; then + echo "Top 5 Language Keywords (in code):" + head -5 "$RESULTS_DIR/grep_keywords.txt" | awk '{printf " %s: %s times\n", $2, $1}' +fi + +echo "" +if [ -f "$RESULTS_DIR/grep_function_calls.txt" ]; then + echo "Top 5 Function/Method Calls (in code):" + head -5 "$RESULTS_DIR/grep_function_calls.txt" | awk '{printf " %s(): %s times\n", $2, $1}' +fi + +echo "" +if [ -f "$RESULTS_DIR/comment_words.txt" ]; then + echo "Top 5 Words in Comments:" + head -5 "$RESULTS_DIR/comment_words.txt" | awk '{printf " %s: %s times\n", $2, $1}' +fi + +echo "" +if [ -f "$RESULTS_DIR/grep_imports.txt" ]; then + echo "Top 5 Imports/Includes:" + head -5 "$RESULTS_DIR/grep_imports.txt" | awk '{count=$1; $1=""; printf " %s: %s times\n", substr($0,2), count}' +fi + +echo "" +echo -e "${BLUE}To explore interactively with cscope (C/C++ only):${NC}" +echo " cd $REPO_DIR && cscope -d -f $RESULTS_DIR/cscope.out" +echo "" +echo -e "${BLUE}To browse tags in vim:${NC}" +echo " cd $REPO_DIR && vim -t main" diff --git a/scripts/utils/generate_study_materials.sh b/scripts/utils/generate_study_materials.sh new file mode 100755 index 0000000..77dce3b --- /dev/null +++ b/scripts/utils/generate_study_materials.sh @@ -0,0 +1,987 @@ +#!/bin/bash +# Generate study materials (documentation links + Anki cards) from repo analysis +# Usage: ./generate_study_materials.sh [--top N] [--languages "python,c,js"] +# +# Examples: +# ./generate_study_materials.sh /tmp/repo_analysis/results_myproject +# ./generate_study_materials.sh /tmp/repo_analysis/results_linux --top 20 --languages "c" +# ./generate_study_materials.sh ./results --languages "python,typescript" + +set -e + +#============================================================================== +# Configuration +#============================================================================== +RESULTS_DIR="${1:-.}" +TOP_N=30 +LANGUAGES="auto" # Will detect from results + +# Parse arguments +shift || true +while [[ $# -gt 0 ]]; do + case "$1" in + --top) + TOP_N="$2" + shift 2 + ;; + --languages) + LANGUAGES="$2" + shift 2 + ;; + *) + shift + ;; + esac +done + +# Output files +DOCS_FILE="$RESULTS_DIR/documentation_links.md" +ANKI_FILE="$RESULTS_DIR/anki_cards.txt" +LLM_PROMPT_FILE="$RESULTS_DIR/llm_anki_prompt.md" + +# Offline documentation setup +OFFLINE_DOCS_DIR="${OFFLINE_DOCS_DIR:-$HOME/.local/share/offline-docs}" +LOOKUP_SCRIPT="$(dirname "$0")/lookup_docs.sh" +USE_OFFLINE_DOCS=false + +# Check if offline docs are available +if [ -d "$OFFLINE_DOCS_DIR" ] && [ -x "$LOOKUP_SCRIPT" ]; then + USE_OFFLINE_DOCS=true +fi + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +NC='\033[0m' + +#============================================================================== +# Offline Documentation Lookup (preferred if available) +#============================================================================== +lookup_offline() { + local term="$1" + local lang="$2" + local import_line="$3" # Optional: full import line for context + + if ! $USE_OFFLINE_DOCS; then + return 1 + fi + + local result + if [ -n "$import_line" ]; then + # Use import-aware lookup - get the line with the file path + result=$("$LOOKUP_SCRIPT" --import "$import_line" "$lang" 2>/dev/null | grep "^/" | head -1) + else + result=$("$LOOKUP_SCRIPT" "$term" "$lang" 2>/dev/null | grep "^File:" | head -1 | sed 's/^File: //') + fi + + if [ -n "$result" ]; then + # Extract file path (before the | separator) + local file_path + file_path=$(echo "$result" | cut -d'|' -f1) + if [ -n "$file_path" ]; then + echo "$file_path" + return 0 + fi + fi + + return 1 +} + +#============================================================================== +# Documentation URL Generators (online fallback) +#============================================================================== + +# Python documentation +python_doc_url() { + local term="$1" + local type="$2" # keyword, builtin, module + + case "$term" in + # Keywords + if|else|elif|for|while|try|except|finally|with|as|import|from|def|class|return|yield|raise|pass|break|continue|and|or|not|in|is|lambda|global|nonlocal|assert|del|True|False|None|async|await) + echo "https://docs.python.org/3/reference/compound_stmts.html" + ;; + # Built-in functions + print|len|range|type|str|int|float|list|dict|set|tuple|bool|open|input|format|sorted|reversed|enumerate|zip|map|filter|any|all|sum|min|max|abs|round|isinstance|issubclass|hasattr|getattr|setattr|delattr|callable|iter|next|super|property|staticmethod|classmethod|vars|dir|help|id|hash|repr|ascii|bin|hex|oct|chr|ord|eval|exec|compile) + echo "https://docs.python.org/3/library/functions.html#$term" + ;; + # Common modules + os|sys|re|json|datetime|collections|itertools|functools|pathlib|subprocess|threading|multiprocessing|asyncio|typing|dataclasses|unittest|pytest|logging|argparse|configparser) + echo "https://docs.python.org/3/library/$term.html" + ;; + # Testing + MagicMock|Mock|patch|PropertyMock) + echo "https://docs.python.org/3/library/unittest.mock.html" + ;; + *) + echo "https://docs.python.org/3/search.html?q=$term" + ;; + esac +} + +# JavaScript/TypeScript documentation (MDN) +js_doc_url() { + local term="$1" + + case "$term" in + # Keywords & statements + if|else|for|while|do|switch|case|break|continue|return|throw|try|catch|finally|function|class|const|let|var|new|this|super|import|export|default|async|await|yield|typeof|instanceof|in|of|delete|void) + echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements" + ;; + # Global objects + Array|Object|String|Number|Boolean|Symbol|Map|Set|WeakMap|WeakSet|Date|RegExp|Error|Promise|Proxy|Reflect|JSON|Math|Intl) + echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/$term" + ;; + # Array methods + map|filter|reduce|forEach|find|findIndex|some|every|includes|indexOf|slice|splice|concat|join|push|pop|shift|unshift|sort|reverse|flat|flatMap) + echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/$term" + ;; + # String methods + split|replace|match|search|substring|substr|toLowerCase|toUpperCase|trim|padStart|padEnd|startsWith|endsWith|charAt|charCodeAt) + echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/$term" + ;; + # Promise methods + then|resolve|reject|all|race|allSettled|any) + echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/$term" + ;; + # Common Web APIs + fetch|console|document|window|localStorage|sessionStorage|setTimeout|setInterval|addEventListener|querySelector|querySelectorAll) + echo "https://developer.mozilla.org/en-US/docs/Web/API" + ;; + *) + echo "https://developer.mozilla.org/en-US/search?q=$term" + ;; + esac +} + +# TypeScript-specific documentation +ts_doc_url() { + local term="$1" + + case "$term" in + interface|type|enum|namespace|declare|readonly|abstract|implements|extends|keyof|typeof|infer|as|is|asserts|satisfies|override) + echo "https://www.typescriptlang.org/docs/handbook/2/everyday-types.html" + ;; + Partial|Required|Readonly|Record|Pick|Omit|Exclude|Extract|NonNullable|ReturnType|Parameters|InstanceType|Awaited) + echo "https://www.typescriptlang.org/docs/handbook/utility-types.html" + ;; + *) + # Fall back to JS docs for runtime features + js_doc_url "$term" + ;; + esac +} + +# C documentation +c_doc_url() { + local term="$1" + + case "$term" in + # Keywords + if|else|for|while|do|switch|case|break|continue|return|goto|sizeof|typedef|struct|union|enum|const|static|extern|register|volatile|inline|restrict|_Bool|_Complex|_Imaginary|_Alignas|_Alignof|_Atomic|_Generic|_Noreturn|_Static_assert|_Thread_local) + echo "https://en.cppreference.com/w/c/keyword/$term" + ;; + # Standard library headers + stdio|stdlib|string|math|time|ctype|stdint|stdbool|stddef|limits|float|errno|assert|signal|setjmp|stdarg|locale) + echo "https://en.cppreference.com/w/c/header/${term}.h" + ;; + # Common functions + printf|fprintf|sprintf|snprintf|scanf|fscanf|sscanf|fopen|fclose|fread|fwrite|fgets|fputs|fseek|ftell|rewind|fflush) + echo "https://en.cppreference.com/w/c/io" + ;; + malloc|calloc|realloc|free|memcpy|memmove|memset|memcmp) + echo "https://en.cppreference.com/w/c/memory" + ;; + strlen|strcpy|strncpy|strcat|strncat|strcmp|strncmp|strchr|strrchr|strstr|strtok) + echo "https://en.cppreference.com/w/c/string/byte" + ;; + *) + echo "https://en.cppreference.com/mwiki/index.php?search=$term" + ;; + esac +} + +# C++ documentation +cpp_doc_url() { + local term="$1" + + case "$term" in + # C++ specific keywords + class|public|private|protected|virtual|override|final|explicit|mutable|constexpr|consteval|constinit|concept|requires|co_await|co_yield|co_return|nullptr|noexcept|decltype|auto|template|typename|namespace|using|new|delete|throw|try|catch|static_cast|dynamic_cast|const_cast|reinterpret_cast) + echo "https://en.cppreference.com/w/cpp/keyword/$term" + ;; + # STL containers + vector|list|deque|array|forward_list|set|map|unordered_set|unordered_map|multiset|multimap|stack|queue|priority_queue) + echo "https://en.cppreference.com/w/cpp/container/$term" + ;; + # STL algorithms + sort|find|copy|move|transform|accumulate|count|remove|unique|reverse|rotate|shuffle|partition|merge|binary_search|lower_bound|upper_bound) + echo "https://en.cppreference.com/w/cpp/algorithm/$term" + ;; + # Smart pointers + unique_ptr|shared_ptr|weak_ptr|make_unique|make_shared) + echo "https://en.cppreference.com/w/cpp/memory/$term" + ;; + # Common classes + string|string_view|optional|variant|any|tuple|pair|function|bind|thread|mutex|future|promise|chrono) + echo "https://en.cppreference.com/w/cpp/utility" + ;; + *) + # Try C docs as fallback + c_doc_url "$term" + ;; + esac +} + +# Rust documentation +rust_doc_url() { + local term="$1" + + case "$term" in + # Keywords + fn|let|mut|const|static|if|else|match|loop|while|for|in|break|continue|return|struct|enum|impl|trait|type|where|pub|mod|use|crate|self|super|async|await|move|ref|dyn|unsafe|extern) + echo "https://doc.rust-lang.org/std/keyword.$term.html" + ;; + # Common types + Option|Result|Vec|String|Box|Rc|Arc|Cell|RefCell|Mutex|RwLock|HashMap|HashSet|BTreeMap|BTreeSet) + echo "https://doc.rust-lang.org/std/$term" + ;; + # Traits + Clone|Copy|Debug|Default|Eq|PartialEq|Ord|PartialOrd|Hash|Display|From|Into|AsRef|AsMut|Deref|DerefMut|Iterator|IntoIterator|Send|Sync) + echo "https://doc.rust-lang.org/std/$term" + ;; + # Macros + println|print|format|vec|panic|assert|assert_eq|assert_ne|debug_assert|todo|unimplemented|unreachable) + echo "https://doc.rust-lang.org/std/macro.$term.html" + ;; + *) + echo "https://doc.rust-lang.org/std/?search=$term" + ;; + esac +} + +# Go documentation +go_doc_url() { + local term="$1" + + case "$term" in + # Keywords + func|var|const|type|struct|interface|map|chan|go|select|defer|if|else|for|range|switch|case|default|break|continue|return|goto|fallthrough|package|import) + echo "https://go.dev/ref/spec" + ;; + # Built-in functions + make|new|len|cap|append|copy|delete|close|panic|recover|print|println|complex|real|imag) + echo "https://pkg.go.dev/builtin#$term" + ;; + # Common packages + fmt|os|io|net|http|json|time|strings|strconv|errors|context|sync|testing|reflect|regexp|sort|math|crypto|encoding|bufio|bytes|path|filepath) + echo "https://pkg.go.dev/$term" + ;; + *) + echo "https://pkg.go.dev/search?q=$term" + ;; + esac +} + +# Ruby documentation +ruby_doc_url() { + local term="$1" + + case "$term" in + # Keywords + if|else|elsif|unless|case|when|while|until|for|do|end|begin|rescue|ensure|raise|return|break|next|redo|retry|yield|def|class|module|self|super|nil|true|false|and|or|not|in|then|alias|defined|__FILE__|__LINE__|__ENCODING__) + echo "https://ruby-doc.org/docs/keywords/1.9/" + ;; + # Core classes + String|Array|Hash|Integer|Float|Symbol|Range|Regexp|Time|Date|File|Dir|IO|Proc|Lambda|Method|Thread|Mutex|Fiber) + echo "https://ruby-doc.org/core/classes/$term.html" + ;; + # Enumerable methods + each|map|select|reject|find|reduce|inject|collect|detect|sort|sort_by|group_by|partition|any|all|none|one|count|first|last|take|drop) + echo "https://ruby-doc.org/core/Enumerable.html" + ;; + *) + echo "https://ruby-doc.org/search.html?q=$term" + ;; + esac +} + +# Java documentation +java_doc_url() { + local term="$1" + + case "$term" in + # Keywords + if|else|for|while|do|switch|case|break|continue|return|throw|try|catch|finally|class|interface|enum|extends|implements|new|this|super|static|final|abstract|public|private|protected|void|null|true|false|instanceof|synchronized|volatile|transient|native|strictfp|assert|default|package|import) + echo "https://docs.oracle.com/javase/tutorial/java/nutsandbolts/" + ;; + # Common classes + String|Integer|Long|Double|Float|Boolean|Character|Object|Class|System|Math|Arrays|Collections|List|ArrayList|LinkedList|Map|HashMap|TreeMap|Set|HashSet|TreeSet|Queue|Stack|Optional|Stream) + echo "https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/$term.html" + ;; + *) + echo "https://docs.oracle.com/en/java/javase/17/docs/api/search.html?q=$term" + ;; + esac +} + +# Shell documentation +shell_doc_url() { + local term="$1" + + case "$term" in + # Built-in commands + if|then|else|elif|fi|for|while|until|do|done|case|esac|in|function|select|time|coproc) + echo "https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs" + ;; + echo|printf|read|declare|local|export|unset|set|shopt|alias|source|eval|exec|exit|return|break|continue|shift|trap|wait|kill|jobs|bg|fg|disown|suspend|logout|cd|pwd|pushd|popd|dirs|type|which|command|builtin|enable|help|hash|bind|complete|compgen|compopt) + echo "https://www.gnu.org/software/bash/manual/bash.html#Shell-Builtin-Commands" + ;; + # Common external commands + grep|sed|awk|find|xargs|sort|uniq|cut|tr|head|tail|wc|cat|tee|diff|patch|tar|gzip|zip|curl|wget|ssh|scp|rsync|git|make|chmod|chown|chgrp|ln|cp|mv|rm|mkdir|rmdir|touch|ls|stat|file|df|du|free|top|ps|kill|pkill|pgrep|nohup|screen|tmux) + echo "https://man7.org/linux/man-pages/man1/$term.1.html" + ;; + *) + echo "https://www.gnu.org/software/bash/manual/bash.html" + ;; + esac +} + +#============================================================================== +# Get documentation URL for a term based on detected language +#============================================================================== +get_doc_url() { + local term="$1" + local lang="$2" + local import_line="$3" # Optional: full import for context + + # Try offline docs first + local offline_result + offline_result=$(lookup_offline "$term" "$lang" "$import_line") + if [ -n "$offline_result" ]; then + echo "$offline_result" + return 0 + fi + + # For TypeScript, also try JavaScript offline docs (most TS keywords are JS) + if [[ "$lang" == "typescript" || "$lang" == "ts" || "$lang" == "tsx" ]]; then + offline_result=$(lookup_offline "$term" "js" "$import_line") + if [ -n "$offline_result" ]; then + echo "$offline_result" + return 0 + fi + fi + + # Fall back to online URLs + case "$lang" in + python|py) + python_doc_url "$term" + ;; + javascript|js|jsx) + js_doc_url "$term" + ;; + typescript|ts|tsx) + # For TypeScript, try JS doc first (since most keywords are shared) + # Only use TS-specific docs for TS-only features + case "$term" in + interface|type|enum|namespace|declare|readonly|abstract|implements|keyof|infer|as|is|asserts|satisfies|override|Partial|Required|Readonly|Record|Pick|Omit|Exclude|Extract|NonNullable|ReturnType|Parameters|InstanceType|Awaited) + ts_doc_url "$term" + ;; + *) + js_doc_url "$term" + ;; + esac + ;; + c) + c_doc_url "$term" + ;; + cpp|c++|cc|cxx) + cpp_doc_url "$term" + ;; + rust|rs) + rust_doc_url "$term" + ;; + go) + go_doc_url "$term" + ;; + ruby|rb) + ruby_doc_url "$term" + ;; + java) + java_doc_url "$term" + ;; + shell|bash|sh) + shell_doc_url "$term" + ;; + *) + echo "https://devdocs.io/#q=$term" + ;; + esac +} + +#============================================================================== +# Detect primary language from results +#============================================================================== +detect_language() { + if [ -f "$RESULTS_DIR/tokei_stats.txt" ]; then + # Parse tokei output to find most used language + grep -E "^\s+(Python|JavaScript|TypeScript|C\+\+|C |Rust|Go|Ruby|Java|Shell)" "$RESULTS_DIR/tokei_stats.txt" 2>/dev/null \ + | head -1 \ + | awk '{print tolower($1)}' \ + | sed 's/c++/cpp/' + else + echo "unknown" + fi +} + +#============================================================================== +# Main Processing +#============================================================================== + +# Check if results directory exists +if [ ! -d "$RESULTS_DIR" ]; then + echo -e "${RED}Error: Results directory not found: $RESULTS_DIR${NC}" + echo "Run analyze_repo.sh first to generate analysis results." + exit 1 +fi + +# Detect or use specified language +if [ "$LANGUAGES" = "auto" ]; then + PRIMARY_LANG=$(detect_language) + echo -e "${BLUE}Detected primary language: ${GREEN}$PRIMARY_LANG${NC}" +else + PRIMARY_LANG=$(echo "$LANGUAGES" | cut -d',' -f1) + echo -e "${BLUE}Using specified language: ${GREEN}$PRIMARY_LANG${NC}" +fi + +echo "" +echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" +echo -e "${GREEN} Generating Study Materials${NC}" +echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" +echo "" +# Patch for generate_study_materials.sh - use per-language files + +#============================================================================== +# Generate Documentation Links (Markdown) +#============================================================================== +echo -e "${YELLOW}Generating documentation links...${NC}" + +cat > "$DOCS_FILE" << 'EOF' +# Documentation Links for Code Review + +This document contains links to official documentation for the most commonly used +functions, keywords, and patterns found in the analyzed codebase. + +**Note:** Items are grouped by language for accurate documentation links. + +--- + +EOF + +# Check for per-language files +PER_LANG_DIR="$RESULTS_DIR/per_language" + +if [ -d "$PER_LANG_DIR" ]; then + echo -e "${GREEN}Using per-language analysis files${NC}" + + # Map internal lang names to doc function names + lang_to_doc() { + case "$1" in + c_cpp) echo "cpp" ;; + javascript) echo "js" ;; + typescript) echo "ts" ;; + shell) echo "bash" ;; + *) echo "$1" ;; + esac + } + + # Process keywords by language + echo "## Language Keywords" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + + for keyword_file in "$PER_LANG_DIR"/keywords_*.txt; do + [ ! -f "$keyword_file" ] && continue + [ ! -s "$keyword_file" ] && continue + + # Extract language name from filename + lang=$(basename "$keyword_file" | sed 's/keywords_//; s/\.txt//') + doc_lang=$(lang_to_doc "$lang") + + # Format language name for display + case "$lang" in + c_cpp) display_lang="C/C++" ;; + javascript) display_lang="JavaScript" ;; + typescript) display_lang="TypeScript" ;; + python) display_lang="Python" ;; + rust) display_lang="Rust" ;; + go) display_lang="Go" ;; + ruby) display_lang="Ruby" ;; + java) display_lang="Java" ;; + shell) display_lang="Shell/Bash" ;; + *) display_lang="$lang" ;; + esac + + echo "### $display_lang Keywords" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + echo "| Keyword | Count | Documentation |" >> "$DOCS_FILE" + echo "|---------|-------|---------------|" >> "$DOCS_FILE" + + head -$TOP_N "$keyword_file" | while read -r count term; do + [ -z "$term" ] && continue + [[ "$term" =~ ^[#] ]] && continue # Skip comment lines + url=$(get_doc_url "$term" "$doc_lang") + echo "| \`$term\` | $count | [docs]($url) |" >> "$DOCS_FILE" + done + echo "" >> "$DOCS_FILE" + done + + # Process functions by language + echo "## Function/Method Calls" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + + for func_file in "$PER_LANG_DIR"/functions_*.txt; do + [ ! -f "$func_file" ] && continue + [ ! -s "$func_file" ] && continue + + lang=$(basename "$func_file" | sed 's/functions_//; s/\.txt//') + doc_lang=$(lang_to_doc "$lang") + + case "$lang" in + c_cpp) display_lang="C/C++" ;; + javascript) display_lang="JavaScript" ;; + typescript) display_lang="TypeScript" ;; + python) display_lang="Python" ;; + rust) display_lang="Rust" ;; + go) display_lang="Go" ;; + ruby) display_lang="Ruby" ;; + java) display_lang="Java" ;; + shell) display_lang="Shell/Bash" ;; + *) display_lang="$lang" ;; + esac + + echo "### $display_lang Functions" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + echo "| Function | Count | Documentation |" >> "$DOCS_FILE" + echo "|----------|-------|---------------|" >> "$DOCS_FILE" + + head -$TOP_N "$func_file" | while read -r count term; do + [ -z "$term" ] && continue + [[ "$term" =~ ^(if|for|while|switch|catch|elif)$ ]] && continue + url=$(get_doc_url "$term" "$doc_lang") + echo "| \`$term()\` | $count | [docs]($url) |" >> "$DOCS_FILE" + done + echo "" >> "$DOCS_FILE" + done + + # Process imports by language + echo "## Imports/Includes" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + + for import_file in "$PER_LANG_DIR"/imports_*.txt; do + [ ! -f "$import_file" ] && continue + [ ! -s "$import_file" ] && continue + + lang=$(basename "$import_file" | sed 's/imports_//; s/\.txt//') + doc_lang=$(lang_to_doc "$lang") + + case "$lang" in + c_cpp) display_lang="C/C++ (#include)" ;; + javascript) display_lang="JavaScript (import/require)" ;; + typescript) display_lang="TypeScript (import)" ;; + python) display_lang="Python (import/from)" ;; + rust) display_lang="Rust (use)" ;; + go) display_lang="Go (import)" ;; + ruby) display_lang="Ruby (require)" ;; + java) display_lang="Java (import)" ;; + shell) display_lang="Shell (source)" ;; + *) display_lang="$lang" ;; + esac + + echo "### $display_lang" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + echo "| Import | Count | Documentation |" >> "$DOCS_FILE" + echo "|--------|-------|---------------|" >> "$DOCS_FILE" + + head -20 "$import_file" | while read -r count import; do + [ -z "$import" ] && continue + # For offline lookup, pass the full import line for better context + url=$(get_doc_url "" "$doc_lang" "$import") + if [ -z "$url" ] || [[ "$url" == *"search.html"* ]]; then + # Fallback: extract module and try again + module=$(echo "$import" | sed -E 's/.*[<"]([^">]+)[">].*/\1/' | sed 's|.*/||' | sed 's/\..*$//') + url=$(get_doc_url "$module" "$doc_lang") + fi + import_escaped=$(echo "$import" | sed 's/|/\\|/g') + echo "| \`$import_escaped\` | $count | [docs]($url) |" >> "$DOCS_FILE" + done + echo "" >> "$DOCS_FILE" + done + +else + # Fallback to combined files (old behavior) + echo -e "${YELLOW}No per-language files found, using combined analysis${NC}" + + if [ -f "$RESULTS_DIR/grep_keywords.txt" ]; then + echo "## Language Keywords" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + echo "| Keyword | Count | Documentation |" >> "$DOCS_FILE" + echo "|---------|-------|---------------|" >> "$DOCS_FILE" + + head -$TOP_N "$RESULTS_DIR/grep_keywords.txt" | while read -r count term; do + [ -z "$term" ] && continue + url=$(get_doc_url "$term" "$PRIMARY_LANG") + echo "| \`$term\` | $count | [docs]($url) |" >> "$DOCS_FILE" + done + echo "" >> "$DOCS_FILE" + fi + + if [ -f "$RESULTS_DIR/grep_function_calls.txt" ]; then + echo "## Function/Method Calls" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + echo "| Function | Count | Documentation |" >> "$DOCS_FILE" + echo "|----------|-------|---------------|" >> "$DOCS_FILE" + + head -$TOP_N "$RESULTS_DIR/grep_function_calls.txt" | while read -r count term; do + [ -z "$term" ] && continue + [[ "$term" =~ ^(if|for|while|switch|catch)$ ]] && continue + url=$(get_doc_url "$term" "$PRIMARY_LANG") + echo "| \`$term()\` | $count | [docs]($url) |" >> "$DOCS_FILE" + done + echo "" >> "$DOCS_FILE" + fi + + if [ -f "$RESULTS_DIR/grep_imports.txt" ]; then + echo "## Imports/Includes" >> "$DOCS_FILE" + echo "" >> "$DOCS_FILE" + echo "| Import | Count | Documentation |" >> "$DOCS_FILE" + echo "|--------|-------|---------------|" >> "$DOCS_FILE" + + head -20 "$RESULTS_DIR/grep_imports.txt" | while read -r count import; do + [ -z "$import" ] && continue + module=$(echo "$import" | sed -E 's/.*[<"]([^">]+)[">].*/\1/' | sed 's|.*/||' | sed 's/\..*$//') + url=$(get_doc_url "$module" "$PRIMARY_LANG") + import_escaped=$(echo "$import" | sed 's/|/\\|/g') + echo "| \`$import_escaped\` | $count | [docs]($url) |" >> "$DOCS_FILE" + done + echo "" >> "$DOCS_FILE" + fi +fi + +echo "" >> "$DOCS_FILE" +echo "---" >> "$DOCS_FILE" +echo "*Generated by analyze_repo.sh + generate_study_materials.sh*" >> "$DOCS_FILE" + +echo -e "${GREEN}Created: $DOCS_FILE${NC}" +#============================================================================== +# Generate Anki Cards (Tab-separated for import) +#============================================================================== +echo -e "${YELLOW}Generating Anki cards...${NC}" + +cat > "$ANKI_FILE" << 'EOF' +# Anki Import File +# Format: FrontBackTags +# Import with: File -> Import, select "Fields separated by: Tab" +# +# Card Types: +# 1. "What does X do?" - For functions/methods +# 2. "When to use X?" - For keywords/patterns +# 3. "What is the syntax for X?" - For language constructs +# +EOF + +# Generate cards for top keywords +if [ -f "$RESULTS_DIR/grep_keywords.txt" ]; then + echo "# Keywords" >> "$ANKI_FILE" + head -$TOP_N "$RESULTS_DIR/grep_keywords.txt" | while read -r count term; do + [ -z "$term" ] && continue + url=$(get_doc_url "$term" "$PRIMARY_LANG") + + # Create different card types based on term type + case "$term" in + if|else|elif|elseif|switch|case|match) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tConditional control flow - executes code based on boolean conditions. See: $url\t${PRIMARY_LANG}::keywords::control-flow" >> "$ANKI_FILE" + ;; + for|while|loop|do|until) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tLoop construct - repeats code execution. See: $url\t${PRIMARY_LANG}::keywords::loops" >> "$ANKI_FILE" + ;; + try|except|catch|finally|raise|throw) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tException handling - manages errors and exceptional conditions. See: $url\t${PRIMARY_LANG}::keywords::exceptions" >> "$ANKI_FILE" + ;; + class|struct|interface|trait|impl) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tType definition - defines custom data structures. See: $url\t${PRIMARY_LANG}::keywords::types" >> "$ANKI_FILE" + ;; + def|fn|func|function) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tFunction definition - declares a reusable block of code. See: $url\t${PRIMARY_LANG}::keywords::functions" >> "$ANKI_FILE" + ;; + import|from|use|require|include) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tModule import - brings external code into current scope. See: $url\t${PRIMARY_LANG}::keywords::modules" >> "$ANKI_FILE" + ;; + async|await|yield) + echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tAsynchronous programming - handles concurrent operations. See: $url\t${PRIMARY_LANG}::keywords::async" >> "$ANKI_FILE" + ;; + *) + echo -e "What does the keyword \`$term\` do in $PRIMARY_LANG?\t[FILL: Look up at $url]\t${PRIMARY_LANG}::keywords" >> "$ANKI_FILE" + ;; + esac + done +fi + +# Generate cards for top functions +if [ -f "$RESULTS_DIR/grep_function_calls.txt" ]; then + echo "" >> "$ANKI_FILE" + echo "# Functions" >> "$ANKI_FILE" + head -$TOP_N "$RESULTS_DIR/grep_function_calls.txt" | while read -r count term; do + [ -z "$term" ] && continue + [[ "$term" =~ ^(if|for|while|switch|catch)$ ]] && continue + url=$(get_doc_url "$term" "$PRIMARY_LANG") + + echo -e "What does \`$term()\` do in $PRIMARY_LANG? (Used $count times)\t[FILL: Look up at $url]\t${PRIMARY_LANG}::functions" >> "$ANKI_FILE" + done +fi + +echo -e "${GREEN}Created: $ANKI_FILE${NC}" + +#============================================================================== +# Generate LLM Prompt for Anki Card Generation +#============================================================================== +echo -e "${YELLOW}Generating LLM prompt...${NC}" + +# Helper function to get doc link for a term +get_llm_doc_link() { + local term="$1" + local lang="$2" + local is_import="$3" # "true" if it's an import line + + # Check if it's an internal/project-specific item + if [[ "$term" =~ ^@/ ]] || [[ "$term" =~ ^\./ ]] || [[ "$term" =~ ^app\. ]] || [[ "$term" =~ ^src/ ]] || [[ "$term" =~ from\ \'@/ ]] || [[ "$term" =~ from\ \'\./ ]]; then + echo "[INTERNAL - SKIP]" + return + fi + + # Try offline lookup + local offline_result + if [ "$is_import" = "true" ]; then + offline_result=$("$LOOKUP_SCRIPT" --import "$term" "$lang" 2>/dev/null | grep "^/" | head -1) + else + offline_result=$("$LOOKUP_SCRIPT" "$term" "$lang" 2>/dev/null | grep "^File:" | head -1 | sed 's/^File: //') + fi + + if [ -n "$offline_result" ]; then + echo "$offline_result" + else + echo "[NO OFFLINE DOC]" + fi +} + +# Generate keywords with doc links +generate_keywords_with_docs() { + local keywords_file="$RESULTS_DIR/grep_keywords.txt" + [ ! -f "$keywords_file" ] && echo "No keywords found" && return + + head -$TOP_N "$keywords_file" | grep -v '^#' | while read -r line; do + local count=$(echo "$line" | awk '{print $1}') + local keyword=$(echo "$line" | awk '{print $2}') + [ -z "$keyword" ] && continue + local doc_link=$(get_llm_doc_link "$keyword" "$PRIMARY_LANG" "false") + echo "$count $keyword → $doc_link" + done +} + +# Generate functions with doc links +generate_functions_with_docs() { + local functions_file="$RESULTS_DIR/grep_function_calls.txt" + [ ! -f "$functions_file" ] && echo "No functions found" && return + + head -$TOP_N "$functions_file" | grep -v '^#' | while read -r line; do + local count=$(echo "$line" | awk '{print $1}') + local func=$(echo "$line" | awk '{print $2}') + + # Skip single-letter functions (minified code) or empty + if [ -z "$func" ] || [ ${#func} -le 1 ]; then + continue + fi + + local doc_link=$(get_llm_doc_link "$func" "$PRIMARY_LANG" "false") + echo "$count $func() → $doc_link" + done +} + +# Generate imports with doc links +generate_imports_with_docs() { + local imports_file="$RESULTS_DIR/grep_imports.txt" + [ ! -f "$imports_file" ] && echo "No imports found" && return + + head -20 "$imports_file" | grep -v '^#' | while read -r line; do + local count=$(echo "$line" | awk '{print $1}') + local import_stmt=$(echo "$line" | cut -d' ' -f2-) + [ -z "$import_stmt" ] && continue + + # Check if internal import + if [[ "$import_stmt" =~ @/ ]] || [[ "$import_stmt" =~ \'\./ ]] || [[ "$import_stmt" =~ from\ app\. ]] || [[ "$import_stmt" =~ from\ src\. ]]; then + echo "$count $import_stmt → [INTERNAL - SKIP]" + else + local doc_link=$(get_llm_doc_link "$import_stmt" "$PRIMARY_LANG" "true") + echo "$count $import_stmt → $doc_link" + fi + done +} + +cat > "$LLM_PROMPT_FILE" << 'PROMPT_HEADER' +# LLM Prompt: Generate Anki Flashcards + +You are creating Anki flashcards from code analysis. + +## CRITICAL INSTRUCTIONS + +1. **READ DOCS VIA TERMINAL** - Use the `cat` command to read each .md file: + ``` + cat /home/kuhy/.local/share/offline-docs/mdn-content/files/en-us/web/javascript/reference/statements/const/index.md + ``` +2. **DO NOT USE YOUR OWN KNOWLEDGE** - Base flashcards ONLY on the content you read from the files +3. **IF YOU CANNOT READ A FILE** - Report: "ERROR: Cannot read [path]" and skip that item +4. **NEVER FALL BACK TO GENERAL KNOWLEDGE** - If you can't read the file, skip it entirely +5. **READ ONE FILE AT A TIME** - Run cat for each topic before creating its flashcards + +PROMPT_HEADER + +cat >> "$LLM_PROMPT_FILE" << EOF +## Context +- Primary Language: **$PRIMARY_LANG** + +## Top Keywords (by frequency) +Items marked \`[INTERNAL - SKIP]\` are project-specific - skip them. +Items marked \`[NO OFFLINE DOC]\` have no offline documentation - use online docs or skip. +Other items have offline doc paths you can reference. + +\`\`\` +$(generate_keywords_with_docs) +\`\`\` + +## Top Functions/Methods (by frequency) +\`\`\` +$(generate_functions_with_docs) +\`\`\` + +## Top Imports/Includes +\`\`\` +$(generate_imports_with_docs) +\`\`\` +EOF + +cat >> "$LLM_PROMPT_FILE" << 'PROMPT_FOOTER' + +## Guidelines + +**CRITICAL - Keep answers EXTREMELY short:** +- Most answers should be **1-2 words** or **1 sentence** +- It's common and expected for an answer to be just: "Returns an array" or "Immutable" +- 2 sentences = longer answer, 3 sentences = absolute maximum (rare) +- Each flashcard tests ONE atomic piece of knowledge + +**NO DUPLICATES:** +- Before creating a card, check if you already created a similar question +- Each unique fact should appear in EXACTLY ONE card +- Do NOT create multiple cards asking the same thing with slightly different wording + +**What to include:** +- Concept cards: "What is X?" / "What does X do?" +- Syntax cards: "How do you write X?" (brief code snippet) +- Comparison cards: "X vs Y - what's the difference?" + +**What to SKIP (do NOT create cards for):** +- MDN frontmatter fields: title, slug, page-type, browser-compat, spec-urls +- YAML metadata between `---` markers at the start of files +- Any line that looks like metadata (key: value at start of doc) +- Empty answers - if you can't find content for the back, skip the card entirely + +**FINAL CARD FOR EACH TOPIC (EXCEPTION TO SHORT ANSWER RULE):** +- Add EXACTLY ONE full documentation card per topic (no duplicates!) +- Question: `[Topic] - Full MDN Documentation` +- Answer: Copy the .md file content STARTING AFTER the `---` frontmatter block +- Skip the YAML frontmatter (everything between the first two `---` lines) +- Do NOT create this card twice for the same topic + +**Skipped items - please review:** +- Items marked `[INTERNAL - SKIP]` are project-specific utilities - I skipped them +- Items marked `[NO OFFLINE DOC]` are third-party libraries without bundled docs +- If you want flashcards for skipped items, tell me which ones to include + +## OUTPUT: CREATE AN ANKI FILE + +**CREATE A FILE DIRECTLY** - Do not just output text. Use your file creation tool to create: + +**File path:** `~/.local/share/study-materials/anki_generated.txt` + +**Format:** Tab-separated values (TSV) with Anki metadata headers: + +``` +#separator:tab +#deck:CodeStudy::JavaScript +#notetype:CodeCard +#columns:Front Back Tags +What does const declare?Block-scoped variables with immutable bindings.javascript declarations +``` + +**Required headers at top of file:** +- `#separator:tab` - Specifies tab as delimiter +- `#deck:CodeStudy::[Language]` - Creates deck "CodeStudy" with sub-deck for language (e.g., CodeStudy::JavaScript) +- `#notetype:CodeCard` - Uses custom note type "CodeCard" (Anki will create if doesn't exist) +- `#columns:Front Back Tags` - Column headers (tab-separated) + +**Rules:** +- Use ACTUAL `` tags (not escaped <code>) +- Use `
` for line breaks within fields +- Use `
` for code blocks  
+- Tags are space-separated
+- Escape any literal tabs within content as spaces
+
+**Example file content:**
+```
+#separator:tab
+#deck:CodeStudy::JavaScript
+#notetype:CodeCard
+#columns:Front	Back	Tags
+What does const declare?Block-scoped variables with immutable bindings.javascript declarations
+Can const be reassigned?No, throws TypeError.javascript declarations
+const - Full Documentation
[ENTIRE CONTENT OF const/index.md FILE]
javascript declarations full-doc +``` + +**After creating the file**, tell the user: +- File created at: ~/.local/share/study-materials/anki_generated.txt +- Import in Anki: File → Import → select the file +- Deck: CodeStudy::[Language], Note type: CodeCard +--- + +**Important:** +- Process only 5-10 items at a time to maintain quality +- Focus on items with offline documentation paths +- Output ONLY the TSV lines, no extra formatting or markdown +PROMPT_FOOTER + +echo -e "${GREEN}Created: $LLM_PROMPT_FILE${NC}" + + +#============================================================================== +# Summary +#============================================================================== +echo "" +echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" +echo -e "${GREEN} Study Materials Generated!${NC}" +echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" +echo "" +echo "Files created:" +echo " 📚 Documentation Links: $DOCS_FILE" +echo " 🎴 Anki Cards: $ANKI_FILE" +echo " 🤖 LLM Prompt: $LLM_PROMPT_FILE" +echo "" +echo "Next steps:" +echo " 1. Review documentation_links.md for learning resources" +echo " 2. Import anki_cards.txt into Anki (File -> Import)" +echo " 3. Use llm_anki_prompt.md with ChatGPT/Claude to generate more cards" +echo "" +echo "Anki import settings:" +echo " - Field separator: Tab" +echo " - Allow HTML: Yes" +echo " - Tags are in last field: Yes" diff --git a/scripts/utils/lookup_docs.sh b/scripts/utils/lookup_docs.sh new file mode 100755 index 0000000..b9c715e --- /dev/null +++ b/scripts/utils/lookup_docs.sh @@ -0,0 +1,942 @@ +#!/bin/bash +#============================================================================== +# Offline Documentation Lookup +# Searches downloaded documentation for terms +# +# Usage: ./lookup_docs.sh [language] [--open] [--extract] +# +# Examples: +# ./lookup_docs.sh Path python # Find Path in Python docs +# ./lookup_docs.sh vector c_cpp # Find vector in C++ docs +# ./lookup_docs.sh map # Find map in all languages +# ./lookup_docs.sh --batch imports.txt # Lookup multiple terms from file +#============================================================================== + +set -e + +# Configuration +DOCS_DIR="${OFFLINE_DOCS_DIR:-$HOME/.local/share/offline-docs}" +INDEX_DIR="$DOCS_DIR/.index" + +# Colors - only use if stdout is a terminal +if [ -t 1 ]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + BLUE='\033[0;34m' + YELLOW='\033[1;33m' + CYAN='\033[0;36m' + NC='\033[0m' +else + RED='' + GREEN='' + BLUE='' + YELLOW='' + CYAN='' + NC='' +fi + +#============================================================================== +# Python-specific lookup +#============================================================================== +lookup_python() { + local term="$1" + local in_module="$2" # Optional: look for term within this module + local doc_dir="$DOCS_DIR/python" + local result="" + local desc="" + + # Normalize term (preserve case for True/False/None) + local term_lower + term_lower=$(echo "$term" | tr '[:upper:]' '[:lower:]') + + # If looking for a term within a specific module + if [ -n "$in_module" ]; then + local module_lower + module_lower=$(echo "$in_module" | tr '[:upper:]' '[:lower:]') + + if [ -f "$doc_dir/library/${module_lower}.html" ]; then + # Find anchor for the specific item in the module + local anchor + anchor=$(grep -oP "id=\"[^\"]*${term}[^\"]*\"" "$doc_dir/library/${module_lower}.html" 2>/dev/null | head -1 | sed 's/id="//;s/"//') + + if [ -n "$anchor" ]; then + result="$doc_dir/library/${module_lower}.html#$anchor" + desc="Python: $in_module.$term" + else + # Just link to the module + result="$doc_dir/library/${module_lower}.html" + desc="Python: $term in module $in_module" + fi + echo "$result|$desc" + return 0 + fi + fi + + #-------------------------------------------------------------------------- + # PRIORITY 1: Python keywords - map to exact documentation locations + #-------------------------------------------------------------------------- + + # Compound statements (reference/compound_stmts.html) + case "$term_lower" in + if|elif|else) + result="$doc_dir/reference/compound_stmts.html#if" + desc="Python: if statement" + ;; + for) + result="$doc_dir/reference/compound_stmts.html#for" + desc="Python: for statement" + ;; + while) + result="$doc_dir/reference/compound_stmts.html#while" + desc="Python: while statement" + ;; + def) + result="$doc_dir/reference/compound_stmts.html#def" + desc="Python: function definition" + ;; + class) + result="$doc_dir/reference/compound_stmts.html#class" + desc="Python: class definition" + ;; + try|except|finally) + result="$doc_dir/reference/compound_stmts.html#try" + desc="Python: try statement" + ;; + with) + result="$doc_dir/reference/compound_stmts.html#with" + desc="Python: with statement" + ;; + async) + result="$doc_dir/reference/compound_stmts.html#async" + desc="Python: async definition" + ;; + match|case) + result="$doc_dir/reference/compound_stmts.html#match" + desc="Python: match statement" + ;; + esac + + # Simple statements (reference/simple_stmts.html) + if [ -z "$result" ]; then + case "$term_lower" in + return) + result="$doc_dir/reference/simple_stmts.html#return" + desc="Python: return statement" + ;; + pass) + result="$doc_dir/reference/simple_stmts.html#pass" + desc="Python: pass statement" + ;; + break) + result="$doc_dir/reference/simple_stmts.html#break" + desc="Python: break statement" + ;; + continue) + result="$doc_dir/reference/simple_stmts.html#continue" + desc="Python: continue statement" + ;; + import|from) + result="$doc_dir/reference/simple_stmts.html#import" + desc="Python: import statement" + ;; + raise) + result="$doc_dir/reference/simple_stmts.html#raise" + desc="Python: raise statement" + ;; + assert) + result="$doc_dir/reference/simple_stmts.html#assert" + desc="Python: assert statement" + ;; + yield) + result="$doc_dir/reference/simple_stmts.html#yield" + desc="Python: yield expression" + ;; + del) + result="$doc_dir/reference/simple_stmts.html#del" + desc="Python: del statement" + ;; + global) + result="$doc_dir/reference/simple_stmts.html#global" + desc="Python: global statement" + ;; + nonlocal) + result="$doc_dir/reference/simple_stmts.html#nonlocal" + desc="Python: nonlocal statement" + ;; + type) + result="$doc_dir/reference/simple_stmts.html#type" + desc="Python: type alias statement" + ;; + esac + fi + + # Expressions/operators (reference/expressions.html) + if [ -z "$result" ]; then + case "$term_lower" in + and) + result="$doc_dir/reference/expressions.html#and" + desc="Python: and operator" + ;; + or) + result="$doc_dir/reference/expressions.html#or" + desc="Python: or operator" + ;; + not) + result="$doc_dir/reference/expressions.html#not" + desc="Python: not operator" + ;; + in) + result="$doc_dir/reference/expressions.html#in" + desc="Python: in operator" + ;; + is) + result="$doc_dir/reference/expressions.html#is" + desc="Python: is operator" + ;; + lambda) + result="$doc_dir/reference/expressions.html#lambda" + desc="Python: lambda expression" + ;; + await) + result="$doc_dir/reference/expressions.html#await" + desc="Python: await expression" + ;; + esac + fi + + # Built-in constants (library/constants.html) - case-sensitive! + if [ -z "$result" ]; then + case "$term" in + True|False) + result="$doc_dir/library/constants.html#$term" + desc="Python: $term constant" + ;; + None) + result="$doc_dir/library/constants.html#None" + desc="Python: None constant" + ;; + Ellipsis) + result="$doc_dir/library/constants.html#Ellipsis" + desc="Python: Ellipsis constant" + ;; + NotImplemented) + result="$doc_dir/library/constants.html#NotImplemented" + desc="Python: NotImplemented constant" + ;; + esac + fi + + # Verify file exists for keyword lookups + if [ -n "$result" ] && [ ! -f "${result%%#*}" ]; then + result="" + desc="" + fi + + #-------------------------------------------------------------------------- + # PRIORITY 2: Check if it's a module (pathlib, os, sys, etc.) + #-------------------------------------------------------------------------- + if [ -z "$result" ] && [ -f "$doc_dir/library/${term_lower}.html" ]; then + result="$doc_dir/library/${term_lower}.html" + desc="Python module: $term" + fi + + #-------------------------------------------------------------------------- + # PRIORITY 3: Built-in functions (library/functions.html) + #-------------------------------------------------------------------------- + if [ -z "$result" ] && [ -f "$doc_dir/library/functions.html" ]; then + if grep -q "id=\"$term_lower\"" "$doc_dir/library/functions.html" 2>/dev/null; then + result="$doc_dir/library/functions.html#$term_lower" + desc="Python built-in function: $term" + fi + fi + + #-------------------------------------------------------------------------- + # PRIORITY 4: Built-in types (library/stdtypes.html) + #-------------------------------------------------------------------------- + if [ -z "$result" ]; then + case "$term_lower" in + str|string) + result="$doc_dir/library/stdtypes.html#str" + desc="Python: str type" + ;; + int|integer) + result="$doc_dir/library/stdtypes.html#int" + desc="Python: int type" + ;; + float) + result="$doc_dir/library/stdtypes.html#float" + desc="Python: float type" + ;; + list) + result="$doc_dir/library/stdtypes.html#list" + desc="Python: list type" + ;; + dict|dictionary) + result="$doc_dir/library/stdtypes.html#dict" + desc="Python: dict type" + ;; + set) + result="$doc_dir/library/stdtypes.html#set" + desc="Python: set type" + ;; + tuple) + result="$doc_dir/library/stdtypes.html#tuple" + desc="Python: tuple type" + ;; + bool|boolean) + result="$doc_dir/library/stdtypes.html#boolean-values" + desc="Python: bool type" + ;; + bytes) + result="$doc_dir/library/stdtypes.html#bytes" + desc="Python: bytes type" + ;; + esac + fi + + #-------------------------------------------------------------------------- + # PRIORITY 5: Check for class/function in module docs (exact id match) + #-------------------------------------------------------------------------- + if [ -z "$result" ]; then + local found_in + # Look for exact id match first + found_in=$(grep -l "id=\"$term\"" "$doc_dir/library/"*.html 2>/dev/null | head -1) + if [ -n "$found_in" ]; then + result="$found_in#$term" + local module + module=$(basename "$found_in" .html) + desc="Python: $term in module $module" + fi + fi + + #-------------------------------------------------------------------------- + # PRIORITY 6: Search in index + #-------------------------------------------------------------------------- + if [ -z "$result" ] && [ -f "$INDEX_DIR/python_index.txt" ]; then + local index_match + index_match=$(grep -i "^$term " "$INDEX_DIR/python_index.txt" 2>/dev/null | head -1) + if [ -n "$index_match" ]; then + result=$(echo "$index_match" | cut -d' ' -f2-) + desc="Python: $term (from index)" + fi + fi + + # NO full-text search fallback - it produces garbage results + # If we can't find a specific doc, return nothing (will fall back to online) + + if [ -n "$result" ]; then + echo "$result|$desc" + fi +} + +#============================================================================== +# C/C++ specific lookup +#============================================================================== +lookup_cpp() { + local term="$1" + local doc_dir="$DOCS_DIR/c_cpp" + local result="" + local desc="" + + # Resolve symlink if present (system package installs to c_cpp/system/) + [ -L "$doc_dir/system" ] && doc_dir="$doc_dir/system" + + # Common C headers + case "$term" in + stdio.h|stdio) + [ -f "$doc_dir/reference/cstdio/index.html" ] && result="$doc_dir/reference/cstdio/index.html" + [ -f "$doc_dir/en/c/io.html" ] && result="$doc_dir/en/c/io.html" + desc="C standard I/O header" + ;; + stdlib.h|stdlib) + [ -f "$doc_dir/reference/cstdlib/index.html" ] && result="$doc_dir/reference/cstdlib/index.html" + [ -f "$doc_dir/en/c/memory.html" ] && result="$doc_dir/en/c/memory.html" + desc="C standard library header" + ;; + string.h|cstring) + [ -f "$doc_dir/reference/cstring/index.html" ] && result="$doc_dir/reference/cstring/index.html" + desc="C string handling header" + ;; + math.h|cmath) + [ -f "$doc_dir/reference/cmath/index.html" ] && result="$doc_dir/reference/cmath/index.html" + desc="C math header" + ;; + esac + + # C++ STL containers + case "$term" in + vector) + [ -f "$doc_dir/reference/vector/index.html" ] && result="$doc_dir/reference/vector/index.html" + [ -f "$doc_dir/en/cpp/container/vector.html" ] && result="$doc_dir/en/cpp/container/vector.html" + desc="C++ std::vector container" + ;; + map) + [ -f "$doc_dir/reference/map/index.html" ] && result="$doc_dir/reference/map/index.html" + desc="C++ std::map container" + ;; + string) + [ -f "$doc_dir/reference/string/index.html" ] && result="$doc_dir/reference/string/index.html" + desc="C++ std::string" + ;; + iostream) + [ -f "$doc_dir/reference/iostream/index.html" ] && result="$doc_dir/reference/iostream/index.html" + desc="C++ iostream header" + ;; + esac + + # C keywords + case "$term" in + if|else|for|while|do|switch|case|break|continue|return|goto) + [ -f "$doc_dir/en/c/language/$term.html" ] && result="$doc_dir/en/c/language/$term.html" + [ -f "$doc_dir/en/cpp/language/$term.html" ] && result="$doc_dir/en/cpp/language/$term.html" + desc="C/C++ keyword: $term" + ;; + int|char|float|double|void|long|short|unsigned|signed) + [ -f "$doc_dir/en/c/language/type.html" ] && result="$doc_dir/en/c/language/type.html" + desc="C/C++ type: $term" + ;; + struct|union|enum|typedef) + [ -f "$doc_dir/en/c/language/$term.html" ] && result="$doc_dir/en/c/language/$term.html" + desc="C/C++ keyword: $term" + ;; + esac + + # Search in files if not found (use -L to follow symlinks) + if [ -z "$result" ]; then + local found + found=$(find -L "$doc_dir" -name "*${term}*" -type f 2>/dev/null | head -1) + if [ -n "$found" ]; then + result="$found" + desc="C/C++: $term" + fi + fi + + if [ -n "$result" ]; then + echo "$result|$desc" + fi +} + +#============================================================================== +# JavaScript/MDN specific lookup +# Searches the cloned MDN content repository +#============================================================================== +lookup_js() { + local term="$1" + local mdn_dir="$DOCS_DIR/mdn-content/files/en-us" + + # Normalize term for searching + local term_lower + term_lower=$(echo "$term" | tr '[:upper:]' '[:lower:]') + + # Handle common statement aliases (MDN uses if...else, try...catch, etc.) + local statement_aliases=( + "if:if...else" + "else:if...else" + "try:try...catch" + "catch:try...catch" + "finally:try...catch" + "do:do...while" + "while:while" + "for:for" + "switch:switch" + "case:switch" + "default:switch" + ) + + for alias in "${statement_aliases[@]}"; do + local key="${alias%%:*}" + local value="${alias##*:}" + if [ "$term_lower" = "$key" ]; then + local stmt_dir="$mdn_dir/web/javascript/reference/statements/$value" + if [ -d "$stmt_dir" ] && [ -f "$stmt_dir/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$stmt_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$stmt_dir/index.md|${title:-$term}" + return 0 + fi + fi + done + + # Handle boolean/null literals + case "$term_lower" in + true|false) + local bool_dir="$mdn_dir/web/javascript/reference/global_objects/boolean" + if [ -d "$bool_dir" ] && [ -f "$bool_dir/index.md" ]; then + echo "$bool_dir/index.md|Boolean ($term)" + return 0 + fi + ;; + null) + local null_dir="$mdn_dir/web/javascript/reference/operators/null" + if [ -d "$null_dir" ] && [ -f "$null_dir/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$null_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$null_dir/index.md|${title:-null}" + return 0 + fi + ;; + undefined) + local undef_dir="$mdn_dir/web/javascript/reference/global_objects/undefined" + if [ -d "$undef_dir" ] && [ -f "$undef_dir/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$undef_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$undef_dir/index.md|${title:-undefined}" + return 0 + fi + ;; + esac + + # Search JavaScript reference directory structure (priority order) + local search_dirs=( + "$mdn_dir/web/javascript/reference/statements" + "$mdn_dir/web/javascript/reference/operators" + "$mdn_dir/web/javascript/reference/global_objects" + "$mdn_dir/web/javascript/reference/functions" + "$mdn_dir/web/javascript/reference/classes" + ) + + for search_dir in "${search_dirs[@]}"; do + if [ -d "$search_dir" ]; then + # Look for exact directory match (MDN uses directories with index.md) + local found_dir + found_dir=$(find "$search_dir" -maxdepth 2 -type d -iname "$term" 2>/dev/null | head -1) + if [ -n "$found_dir" ] && [ -f "$found_dir/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$found_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$found_dir/index.md|${title:-$term}" + return 0 + fi + fi + done + + # Search Web APIs - prioritize *_api directories for common terms + if [ -d "$mdn_dir/web/api" ]; then + # First try _api directory (e.g., fetch_api, console_api) + local api_dir="$mdn_dir/web/api/${term_lower}_api" + if [ -d "$api_dir" ] && [ -f "$api_dir/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$api_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$api_dir/index.md|${title:-$term API}" + return 0 + fi + + # Then try exact top-level API interface (e.g., Console, Document, Element) + local found + found=$(find "$mdn_dir/web/api" -maxdepth 1 -type d -iname "$term" 2>/dev/null | head -1) + if [ -n "$found" ] && [ -f "$found/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$found/index.md|${title:-$term}" + return 0 + fi + + # Try window/ for global functions like alert, confirm, etc. + local window_method="$mdn_dir/web/api/window/${term_lower}" + if [ -d "$window_method" ] && [ -f "$window_method/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$window_method/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$window_method/index.md|${title:-Window.$term()}" + return 0 + fi + + # Search nested API methods + found=$(find "$mdn_dir/web/api" -maxdepth 3 -type d -iname "$term" 2>/dev/null | head -1) + if [ -n "$found" ] && [ -f "$found/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$found/index.md|${title:-$term}" + return 0 + fi + fi + + # Now try partial matches in Global Objects (e.g., Array.from, Object.keys) + if [ -d "$mdn_dir/web/javascript/reference/global_objects" ]; then + local found + found=$(find "$mdn_dir/web/javascript/reference/global_objects" -maxdepth 2 -type d -iname "*${term}*" 2>/dev/null | head -1) + if [ -n "$found" ] && [ -f "$found/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$found/index.md|${title:-$term}" + return 0 + fi + fi + + # Glossary as last resort + if [ -d "$mdn_dir/glossary" ]; then + local found + found=$(find "$mdn_dir/glossary" -maxdepth 1 -type d -iname "$term" 2>/dev/null | head -1) + if [ -n "$found" ] && [ -f "$found/index.md" ]; then + local title + title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "$found/index.md|${title:-$term}" + return 0 + fi + fi + + return 1 +} + +#============================================================================== +# Rust specific lookup +#============================================================================== +lookup_rust() { + local term="$1" + local result="" + local desc="" + + if command -v rustup &>/dev/null; then + # Use rustup doc to get path + local rust_doc_path + rust_doc_path=$(rustup doc --path 2>/dev/null | head -1 | xargs dirname 2>/dev/null) + + # Search in std docs + if [ -d "$rust_doc_path/std" ]; then + local found + found=$(find "$rust_doc_path/std" -name "*${term}*" -type f 2>/dev/null | head -1) + if [ -n "$found" ]; then + result="$found" + desc="Rust: $term" + fi + fi + fi + + if [ -n "$result" ]; then + echo "$result|$desc" + fi +} + +#============================================================================== +# Go specific lookup +#============================================================================== +lookup_go() { + local term="$1" + local result="" + local desc="" + + if command -v go &>/dev/null; then + # Check if it's a stdlib package + if go doc "$term" &>/dev/null; then + result="go doc $term" + desc="Go package: $term (use 'go doc $term' to view)" + fi + fi + + if [ -n "$result" ]; then + echo "$result|$desc" + fi +} + +#============================================================================== +# Shell specific lookup +#============================================================================== +lookup_shell() { + local term="$1" + local doc_dir="$DOCS_DIR/shell" + local result="" + local desc="" + + # Check bash builtins + if [ -f "$doc_dir/bash_builtins.txt" ]; then + if grep -q "=== $term ===" "$doc_dir/bash_builtins.txt" 2>/dev/null; then + result="$doc_dir/bash_builtins.txt" + desc="Bash builtin: $term" + fi + fi + + # Check common commands + if [ -z "$result" ] && [ -f "$doc_dir/common_commands.txt" ]; then + if grep -q "^$term" "$doc_dir/common_commands.txt" 2>/dev/null; then + local cmd_desc + cmd_desc=$(grep "^$term" "$doc_dir/common_commands.txt" | head -1) + result="$doc_dir/common_commands.txt" + desc="Shell command: $cmd_desc" + fi + fi + + # Try man page + if [ -z "$result" ]; then + local man_path + man_path=$(man -w "$term" 2>/dev/null) + if [ -n "$man_path" ]; then + result="man $term" + desc="Manual page: $term (use 'man $term' to view)" + fi + fi + + if [ -n "$result" ]; then + echo "$result|$desc" + fi +} + +#============================================================================== +# Generic lookup (searches all languages) +#============================================================================== +lookup_all() { + local term="$1" + + # Try each language + for lang in python cpp js rust go shell; do + local result + result=$(lookup_$lang "$term" 2>/dev/null) + if [ -n "$result" ]; then + echo "$lang: $result" + fi + done +} + +#============================================================================== +# Parse Python import and lookup the actual imported item +#============================================================================== +parse_python_import() { + local import_line="$1" + + # Handle "from X import Y" format + if [[ "$import_line" =~ ^from[[:space:]]+([^[:space:]]+)[[:space:]]+import[[:space:]]+(.+) ]]; then + local module="${BASH_REMATCH[1]}" + local items="${BASH_REMATCH[2]}" + + # Clean up items (remove parentheses, commas, etc.) + items=$(echo "$items" | sed 's/[(),]//g' | awk '{print $1}') + + # Output: module and first imported item + echo "$module|$items" + return 0 + fi + + # Handle "import X" format + if [[ "$import_line" =~ ^import[[:space:]]+([^[:space:],]+) ]]; then + local module="${BASH_REMATCH[1]}" + echo "$module|" + return 0 + fi + + return 1 +} + +#============================================================================== +# Smart lookup for imports +#============================================================================== +lookup_import() { + local import_line="$1" + local lang="$2" + + case "$lang" in + python) + local parsed + parsed=$(parse_python_import "$import_line") + if [ -n "$parsed" ]; then + local module item + module=$(echo "$parsed" | cut -d'|' -f1) + item=$(echo "$parsed" | cut -d'|' -f2) + + # For "from X import Y", look up Y within module X's documentation + if [ -n "$item" ] && [ -n "$module" ]; then + local result + # Pass both item and module to lookup_python + result=$(lookup_python "$item" "$module") + if [ -n "$result" ]; then + echo "$result" + return 0 + fi + fi + + # Fall back to module documentation + lookup_python "$module" + fi + ;; + + c_cpp) + # Extract header name from #include
or #include "header" + local header + header=$(echo "$import_line" | sed -E 's/#include\s*[<"]([^">]+)[">]/\1/' | sed 's/\.h$//') + lookup_cpp "$header" + ;; + + javascript|typescript) + # Extract module from import/require + local module="" + # Match: from "module" or from 'module' + module=$(echo "$import_line" | grep -oP "from\s+['\"]\\K[^'\"]+") + if [ -z "$module" ]; then + # Match: require("module") or require('module') + module=$(echo "$import_line" | grep -oP "require\\(['\"]\\K[^'\"]+") + fi + [ -n "$module" ] && lookup_js "$module" + ;; + + *) + echo "Unknown language: $lang" + ;; + esac +} + +#============================================================================== +# Extract documentation content +#============================================================================== +extract_doc_content() { + local file="$1" + local term="$2" + local max_lines="${3:-20}" + + if [[ "$file" == *.html ]]; then + # Extract text from HTML, find section about term + if command -v html2text &>/dev/null; then + html2text "$file" 2>/dev/null | grep -A"$max_lines" -i "$term" | head -"$max_lines" + elif command -v lynx &>/dev/null; then + lynx -dump -nolist "$file" 2>/dev/null | grep -A"$max_lines" -i "$term" | head -"$max_lines" + else + # Basic extraction + sed 's/<[^>]*>//g' "$file" | grep -A"$max_lines" -i "$term" | head -"$max_lines" + fi + elif [[ "$file" == *.json ]]; then + # Pretty print JSON section + grep -A5 "\"$term\"" "$file" 2>/dev/null + else + # Plain text + grep -A"$max_lines" -i "$term" "$file" | head -"$max_lines" + fi +} + +#============================================================================== +# Main +#============================================================================== +usage() { + cat << EOF +Usage: $0 [language] [options] + +Search offline documentation for a term. + +Languages: python, cpp, c_cpp, js, javascript, rust, go, shell, all + +Options: + --open Open the documentation file (requires xdg-open) + --extract Extract and display relevant content + --import Parse and lookup an import statement + --batch Process multiple terms from a file + +Examples: + $0 Path python # Find Path in Python docs + $0 vector cpp # Find vector in C++ docs + $0 map # Find map in all languages + $0 --import "from pathlib import Path" python + $0 --batch imports.txt python +EOF +} + +main() { + if [ $# -eq 0 ]; then + usage + exit 0 + fi + + local term="" + local lang="" + local action="lookup" + local open_file=false + local extract=false + + while [ $# -gt 0 ]; do + case "$1" in + --open) + open_file=true + shift + ;; + --extract) + extract=true + shift + ;; + --import) + action="import" + shift + term="$1" + shift + ;; + --batch) + action="batch" + shift + term="$1" # This is the file + shift + ;; + --help|-h) + usage + exit 0 + ;; + python|cpp|c_cpp|c|js|javascript|ts|typescript|tsx|jsx|rust|go|shell|bash|all) + lang="$1" + shift + ;; + *) + if [ -z "$term" ]; then + term="$1" + fi + shift + ;; + esac + done + + # Normalize language + case "$lang" in + c) lang="cpp" ;; + javascript|js|typescript|ts|jsx|tsx) lang="js" ;; + bash) lang="shell" ;; + "") lang="all" ;; + esac + + case "$action" in + lookup) + if [ "$lang" = "all" ]; then + lookup_all "$term" + else + result=$(lookup_$lang "$term" 2>/dev/null) + if [ -n "$result" ]; then + local file desc + file=$(echo "$result" | cut -d'|' -f1) + desc=$(echo "$result" | cut -d'|' -f2) + + echo -e "${GREEN}Found:${NC} $desc" + echo -e "${BLUE}File:${NC} $file" + + if $extract; then + echo "" + echo -e "${YELLOW}--- Content ---${NC}" + extract_doc_content "$file" "$term" + fi + + if $open_file && [ -f "$file" ]; then + xdg-open "$file" 2>/dev/null & + fi + else + echo -e "${RED}Not found:${NC} $term in $lang documentation" + fi + fi + ;; + + import) + result=$(lookup_import "$term" "$lang") + if [ -n "$result" ]; then + echo -e "${GREEN}Import lookup:${NC} $term" + echo "$result" + else + echo -e "${RED}Could not parse import:${NC} $term" + fi + ;; + + batch) + if [ ! -f "$term" ]; then + echo "File not found: $term" + exit 1 + fi + + while IFS= read -r line || [ -n "$line" ]; do + [ -z "$line" ] && continue + [[ "$line" =~ ^# ]] && continue + + echo -e "${CYAN}Looking up:${NC} $line" + lookup_import "$line" "$lang" + echo "" + done < "$term" + ;; + esac +} + +main "$@" diff --git a/scripts/utils/repo_to_study.sh b/scripts/utils/repo_to_study.sh new file mode 100755 index 0000000..074b29f --- /dev/null +++ b/scripts/utils/repo_to_study.sh @@ -0,0 +1,364 @@ +#!/usr/bin/env bash +#============================================================================== +# repo_to_study.sh - Complete pipeline: Repo → Analysis → Offline Docs → Study Materials +# +# Usage: +# repo_to_study.sh +# +# Examples: +# repo_to_study.sh https://github.com/user/repo +# repo_to_study.sh /path/to/local/repo +# repo_to_study.sh . +# +# Output: +# Creates study materials in ~/.local/share/study-materials// +# - documentation_links.md (with offline doc paths) +# - anki_cards.txt (importable to Anki) +# - llm_anki_prompt.md (for generating more cards with AI) +#============================================================================== + +set -euo pipefail + +# Script directory for finding other tools +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ANALYZE_SCRIPT="$SCRIPT_DIR/analyze_repo.sh" +STUDY_SCRIPT="$SCRIPT_DIR/generate_study_materials.sh" +SETUP_DOCS_SCRIPT="$SCRIPT_DIR/setup_offline_docs.sh" + +# Default output location (not in script dir, user's data dir) +STUDY_MATERIALS_BASE="$HOME/.local/share/study-materials" + +# Work directories +WORK_DIR="/tmp/repo_study_$$" +OUTPUT_DIR="" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +#============================================================================== +# Helper Functions (all print to stderr to not interfere with return values) +#============================================================================== +print_header() { + echo -e "\n${BOLD}${CYAN}════════════════════════════════════════════════════════════${NC}" >&2 + echo -e "${BOLD}${CYAN} $1${NC}" >&2 + echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════${NC}\n" >&2 +} + +print_step() { + echo -e "${BOLD}${BLUE}▶ $1${NC}" >&2 +} + +print_success() { + echo -e "${GREEN}✓ $1${NC}" >&2 +} + +print_error() { + echo -e "${RED}✗ $1${NC}" >&2 +} + +print_info() { + echo -e "${YELLOW}→ $1${NC}" >&2 +} + +cleanup() { + if [ -d "$WORK_DIR" ] && [ "$WORK_DIR" != "/" ]; then + rm -rf "$WORK_DIR" + fi +} + +trap cleanup EXIT + +usage() { + cat << EOF +repo_to_study.sh - Generate study materials from any repository + +USAGE: + $(basename "$0") [output_dir] + +ARGUMENTS: + repo_url_or_path Git URL (https/ssh) or local path to repository + output_dir Optional: where to save results + Default: ~/.local/share/study-materials// + +EXAMPLES: + $(basename "$0") https://github.com/python/cpython + $(basename "$0") git@github.com:torvalds/linux.git + $(basename "$0") /home/user/my-project + $(basename "$0") . ~/notes/my_study_notes + +OUTPUT FILES: + documentation_links.md - Markdown with offline documentation links + anki_cards.txt - Tab-separated file for Anki import + llm_anki_prompt.md - Prompt template for AI-generated cards + analysis/ - Raw analysis data (imports, keywords, functions) + +EOF + exit 0 +} + +#============================================================================== +# Check Dependencies +#============================================================================== +check_dependencies() { + local missing=() + + # Check for required scripts + if [ ! -x "$ANALYZE_SCRIPT" ]; then + missing+=("analyze_repo.sh not found at $ANALYZE_SCRIPT") + fi + + if [ ! -x "$STUDY_SCRIPT" ]; then + missing+=("generate_study_materials.sh not found at $STUDY_SCRIPT") + fi + + # Check for basic tools + for cmd in git curl grep sed awk; do + if ! command -v "$cmd" &>/dev/null; then + missing+=("$cmd") + fi + done + + if [ ${#missing[@]} -gt 0 ]; then + print_error "Missing dependencies:" + for dep in "${missing[@]}"; do + echo " - $dep" + done + exit 1 + fi +} + +#============================================================================== +# Ensure Offline Docs are Available +#============================================================================== +ensure_offline_docs() { + local docs_dir="$HOME/.local/share/offline-docs" + + if [ ! -d "$docs_dir/python" ]; then + print_info "Offline docs not found. Setting up Python documentation..." + if [ -x "$SETUP_DOCS_SCRIPT" ]; then + "$SETUP_DOCS_SCRIPT" --python + else + print_info "Run setup_offline_docs.sh --all to enable offline documentation" + fi + fi +} + +# Global to store repo name for cloned repos +REPO_NAME="" + +#============================================================================== +# Get Repository +#============================================================================== +get_repo() { + local input="$1" + local repo_dir="" + + # Check if it's a URL (git clone needed) + if [[ "$input" =~ ^https?:// ]] || [[ "$input" =~ ^git@ ]]; then + print_step "Cloning repository..." + + # Extract repo name from URL + REPO_NAME=$(basename "$input" .git) + repo_dir="$WORK_DIR/$REPO_NAME" + mkdir -p "$WORK_DIR" + + if git clone --depth 1 "$input" "$repo_dir" >&2 2>&1; then + print_success "Cloned: $input" + else + print_error "Failed to clone repository" + exit 1 + fi + + echo "$repo_dir" + # Local path + elif [ -d "$input" ]; then + # Convert to absolute path + repo_dir="$(cd "$input" && pwd)" + REPO_NAME=$(basename "$repo_dir") + print_success "Using local repository: $repo_dir" + echo "$repo_dir" + else + print_error "Invalid input: '$input' is not a valid URL or directory" + exit 1 + fi +} + +#============================================================================== +# Analyze Repository +#============================================================================== +analyze_repo() { + local repo_path="$1" + local repo_name="$REPO_NAME" + [ -z "$repo_name" ] && repo_name=$(basename "$repo_path") + + print_step "Analyzing repository..." + + # Run the analyzer (it outputs to stderr/stdout, results go to /tmp/repo_analysis/) + "$ANALYZE_SCRIPT" "$repo_path" >&2 || true + + # Find the results directory + local results_dir="/tmp/repo_analysis/results_${repo_name}" + if [ ! -d "$results_dir" ]; then + # Try without prefix + results_dir="/tmp/repo_analysis/results" + fi + + if [ ! -d "$results_dir" ] || [ ! -d "$results_dir/per_language" ]; then + print_error "Could not find analysis results at $results_dir" + exit 1 + fi + + print_success "Analysis complete: $results_dir" + echo "$results_dir" +} + +#============================================================================== +# Generate Study Materials +#============================================================================== +generate_materials() { + local analysis_dir="$1" + local output_dir="$2" + + print_step "Generating study materials with offline documentation..." + + # Run study materials generator + cd "$analysis_dir" + if "$STUDY_SCRIPT" . 2>/dev/null | grep -E "^(Created|✓|Files created)" | head -5; then + print_success "Study materials generated" + else + # Try anyway, might have succeeded + true + fi + + # Create output directory and copy results + mkdir -p "$output_dir" + + # Copy generated files + [ -f "documentation_links.md" ] && cp "documentation_links.md" "$output_dir/" + [ -f "anki_cards.txt" ] && cp "anki_cards.txt" "$output_dir/" + [ -f "llm_anki_prompt.md" ] && cp "llm_anki_prompt.md" "$output_dir/" + + # Copy analysis data + mkdir -p "$output_dir/analysis" + [ -d "per_language" ] && cp -r "per_language" "$output_dir/analysis/" + [ -f "grep_imports.txt" ] && cp "grep_imports.txt" "$output_dir/analysis/" + [ -f "grep_keywords.txt" ] && cp "grep_keywords.txt" "$output_dir/analysis/" + [ -f "grep_function_calls.txt" ] && cp "grep_function_calls.txt" "$output_dir/analysis/" + + print_success "Files saved to: $output_dir" +} + +#============================================================================== +# Show Summary +#============================================================================== +show_summary() { + local output_dir="$1" + + print_header "Study Materials Ready!" + + echo -e "${BOLD}Output directory:${NC} $output_dir" + echo "" + echo -e "${BOLD}Generated files:${NC}" + + if [ -f "$output_dir/documentation_links.md" ]; then + local doc_lines + doc_lines=$(wc -l < "$output_dir/documentation_links.md") + echo -e " 📚 ${GREEN}documentation_links.md${NC} ($doc_lines lines)" + echo " Contains links to OFFLINE documentation" + fi + + if [ -f "$output_dir/anki_cards.txt" ]; then + local card_count + card_count=$(grep -c $'^\w' "$output_dir/anki_cards.txt" 2>/dev/null || echo "0") + echo -e " 🎴 ${GREEN}anki_cards.txt${NC} (~$card_count cards)" + echo " Import to Anki: File → Import → Tab separated" + fi + + if [ -f "$output_dir/llm_anki_prompt.md" ]; then + echo -e " 🤖 ${GREEN}llm_anki_prompt.md${NC}" + echo " Use with ChatGPT/Claude to generate more cards" + fi + + if [ -d "$output_dir/analysis" ]; then + echo -e " 📊 ${GREEN}analysis/${NC}" + echo " Raw analysis data (imports, keywords, functions per language)" + fi + + echo "" + echo -e "${BOLD}Quick preview of imports with offline docs:${NC}" + if [ -f "$output_dir/documentation_links.md" ]; then + grep -A20 "import/from" "$output_dir/documentation_links.md" 2>/dev/null | \ + grep "^\| \`" | head -5 | \ + sed 's/|/│/g' + fi + + echo "" + echo -e "${BOLD}Next steps:${NC}" + echo " 1. Open documentation_links.md to browse offline docs" + echo " 2. Import anki_cards.txt into Anki for spaced repetition" + echo " 3. Use llm_anki_prompt.md to generate more targeted cards" + echo "" + echo -e "${CYAN}To view a doc:${NC} xdg-open 'file:///path/from/documentation_links.md'" +} + +#============================================================================== +# Main +#============================================================================== +main() { + # Handle help + if [ $# -lt 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + usage + fi + + local input="$1" + local output_dir="${2:-}" # Will be set after we know repo name + + print_header "Repo → Study Materials Pipeline" + + # Setup + mkdir -p "$WORK_DIR" + check_dependencies + ensure_offline_docs + + # Step 1: Get repository + print_header "Step 1/3: Getting Repository" + local repo_path + repo_path=$(get_repo "$input") + + # Extract repo name from path (since get_repo runs in subshell, REPO_NAME is lost) + if [ -z "$REPO_NAME" ]; then + REPO_NAME=$(basename "$repo_path") + fi + + # Set default output dir based on repo name + if [ -z "$output_dir" ]; then + output_dir="$STUDY_MATERIALS_BASE/$REPO_NAME" + elif [[ "$output_dir" != /* ]]; then + # Convert relative to absolute + output_dir="$(pwd)/$output_dir" + fi + + echo -e "${BOLD}Input:${NC} $input" >&2 + echo -e "${BOLD}Output:${NC} $output_dir" >&2 + echo "" >&2 + + # Step 2: Analyze + print_header "Step 2/3: Analyzing Code" + local analysis_dir + analysis_dir=$(analyze_repo "$repo_path") + + # Step 3: Generate materials + print_header "Step 3/3: Generating Study Materials" + generate_materials "$analysis_dir" "$output_dir" + + # Show results + show_summary "$output_dir" +} + +main "$@" diff --git a/scripts/utils/setup_offline_docs.sh b/scripts/utils/setup_offline_docs.sh new file mode 100755 index 0000000..c0622f8 --- /dev/null +++ b/scripts/utils/setup_offline_docs.sh @@ -0,0 +1,713 @@ +#!/bin/bash +#============================================================================== +# Offline Documentation Setup +# Downloads and indexes official documentation for multiple programming languages +# +# Usage: ./setup_offline_docs.sh [--all | --python | --c | --js | --rust | --go] +# +# Documentation is stored in: ~/.local/share/offline-docs/ +#============================================================================== + +set -e + +# Configuration +DOCS_DIR="${OFFLINE_DOCS_DIR:-$HOME/.local/share/offline-docs}" +INDEX_DIR="$DOCS_DIR/.index" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +NC='\033[0m' + +print_header() { + echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" + echo -e "${GREEN} $1${NC}" + echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" +} + +print_status() { + echo -e "${YELLOW}→${NC} $1" +} + +print_success() { + echo -e "${GREEN}✓${NC} $1" +} + +print_error() { + echo -e "${RED}✗${NC} $1" +} + +# Create directory structure +setup_dirs() { + mkdir -p "$DOCS_DIR"/{python,c_cpp,javascript,typescript,rust,go,ruby,java,shell} + mkdir -p "$INDEX_DIR" +} + +#============================================================================== +# Python Documentation +# Source: https://docs.python.org/3/download.html +#============================================================================== +download_python_docs() { + print_header "Python Documentation" + local dest="$DOCS_DIR/python" + + # Check if already downloaded + if [ -f "$dest/library/index.html" ]; then + print_status "Python docs already present, checking for updates..." + fi + + print_status "Downloading Python 3.12 documentation..." + + # Download HTML documentation (most searchable) + local url="https://www.python.org/ftp/python/doc/3.12.8/python-3.12.8-docs-html.tar.bz2" + local archive="/tmp/python-docs.tar.bz2" + + if curl -L -o "$archive" "$url" 2>/dev/null; then + print_status "Extracting..." + tar -xjf "$archive" -C "$dest" --strip-components=1 + rm -f "$archive" + print_success "Python documentation installed to $dest" + + # Build index + build_python_index + else + print_error "Failed to download Python docs" + print_status "Alternative: Use 'python -m pydoc' for built-in docs" + fi +} + +build_python_index() { + print_status "Building Python documentation index..." + local dest="$DOCS_DIR/python" + local index="$INDEX_DIR/python_index.txt" + + # Create searchable index: term -> file path + { + # Index library modules + find "$dest/library" -name "*.html" -exec basename {} .html \; 2>/dev/null | while read -r mod; do + echo "$mod $dest/library/$mod.html" + done + + # Index built-in functions from functions.html + if [ -f "$dest/library/functions.html" ]; then + grep -oP '(?<=id=")[^"]+' "$dest/library/functions.html" 2>/dev/null | while read -r func; do + echo "$func $dest/library/functions.html#$func" + done + fi + + # Index from general index + if [ -f "$dest/genindex.html" ]; then + grep -oP 'href="([^"]+)"[^>]*>([^<]+)' "$dest/genindex.html" 2>/dev/null | \ + sed -E 's/href="([^"]+)"[^>]*>([^<]+)/\2 \1/' | \ + head -5000 + fi + } | sort -u > "$index" + + print_success "Python index created with $(wc -l < "$index") entries" +} + +#============================================================================== +# C/C++ Documentation (cppreference) +# Uses cppman tool which caches pages from cppreference.com +# Fallback: AUR cppreference package or direct download +#============================================================================== +download_cpp_docs() { + print_header "C/C++ Documentation (cppreference)" + local dest="$DOCS_DIR/c_cpp" + + if [ -f "$dest/en/index.html" ] || [ -d "$dest/reference" ] || [ -L "$dest/system" ]; then + print_status "C/C++ docs already present" + return 0 + fi + + mkdir -p "$dest" + + # Method 1: Use cppman if available (best - fetches and caches on demand) + if command -v cppman &>/dev/null; then + print_status "Found cppman, caching common C++ references..." + cppman -s cppreference.com 2>/dev/null + cppman -c 2>/dev/null # Cache all pages + print_success "cppman configured - use 'cppman ' for lookups" + print_status "Cppman cache at: ~/.cache/cppman/" + ln -sf ~/.cache/cppman "$dest/cppman_cache" 2>/dev/null + build_cpp_index + return 0 + fi + + # Method 2: Check if system package already installed + if [ -d /usr/share/doc/cppreference/en ]; then + print_status "Found system cppreference package" + ln -sf /usr/share/doc/cppreference "$dest/system" + print_success "C/C++ documentation linked from system package" + build_cpp_index + return 0 + fi + + # Method 3: Try AUR package (Arch Linux) + if command -v yay &>/dev/null; then + print_status "Installing cppreference from AUR..." + if yay -S --noconfirm cppreference 2>/dev/null; then + # Link to installed docs (the package uses /en not /html) + if [ -d /usr/share/doc/cppreference/en ]; then + ln -sf /usr/share/doc/cppreference "$dest/system" + print_success "C/C++ documentation linked from system package" + build_cpp_index + return 0 + fi + fi + fi + + # Method 4: Direct download (try multiple mirrors) + print_status "Downloading cppreference offline archive..." + local archive="/tmp/cppreference.tar.xz" + local urls=( + "https://upload.cppreference.com/mwiki/images/1/16/html_book_20241110.tar.xz" + "https://github.com/nicovank/cppreference-doc/releases/latest/download/html_book.tar.xz" + ) + + for url in "${urls[@]}"; do + print_status "Trying: $url" + if curl -fL -o "$archive" "$url" 2>/dev/null; then + print_status "Extracting (this may take a while)..." + if tar -xJf "$archive" -C "$dest" 2>/dev/null; then + rm -f "$archive" + print_success "C/C++ documentation installed to $dest" + build_cpp_index + return 0 + fi + fi + done + + print_error "Failed to download cppreference" + print_status "Manual install: yay -S cppreference OR yay -S cppman" + return 1 +} + +build_cpp_index() { + print_status "Building C/C++ documentation index..." + local dest="$DOCS_DIR/c_cpp" + local index="$INDEX_DIR/cpp_index.txt" + + # Resolve symlink if present + local search_dir="$dest" + [ -L "$dest/system" ] && search_dir="$dest/system" + + { + # Find all HTML files and extract identifiers + # Format: term|filepath (using | as separator to handle spaces) + find "$search_dir" -name "*.html" -type f 2>/dev/null | while read -r file; do + # Extract meaningful term from path (e.g., /en/cpp/container/vector.html -> vector) + local term + term=$(basename "$file" .html) + # Skip index files and overly generic names + [[ "$term" == "index" ]] && continue + echo "${term}|${file}" + done + + # Also index by path components for better discoverability + # e.g., cpp/container/vector -> vector + find "$search_dir/en" -name "*.html" -type f 2>/dev/null | while read -r file; do + # Extract path relative to en/ and create searchable term + local relpath + relpath=$(echo "$file" | sed "s|$search_dir/en/||" | sed 's|\.html$||') + # Get the last component as primary term + local term + term=$(basename "$relpath") + [[ "$term" == "index" ]] && continue + # Also add the full path as a searchable term (cpp/vector, c/stdlib/malloc) + echo "${relpath}|${file}" + done + } | sort -u > "$index" + + print_success "C/C++ index created with $(wc -l < "$index") entries" +} + +#============================================================================== +# JavaScript/MDN Documentation +# Clone the actual MDN content repository for full documentation +# https://github.com/mdn/content +#============================================================================== +download_js_docs() { + print_header "JavaScript/MDN Documentation" + local dest="$DOCS_DIR/javascript" + local mdn_repo="$DOCS_DIR/mdn-content" + + # Check if already cloned + if [ -d "$mdn_repo/files/en-us/web/javascript" ]; then + print_status "MDN content already present" + build_js_index + return 0 + fi + + print_status "Cloning MDN content repository (sparse checkout for web docs)..." + print_status "This may take a few minutes on first run..." + + mkdir -p "$mdn_repo" + cd "$mdn_repo" || exit 1 + + # Initialize sparse checkout to only get what we need + if [ ! -d ".git" ]; then + git init + git remote add origin https://github.com/mdn/content.git + git config core.sparseCheckout true + + # Only checkout web-related documentation (JS, HTML, CSS, Web APIs) + cat > .git/info/sparse-checkout << 'SPARSE' +/files/en-us/web/javascript/ +/files/en-us/web/api/ +/files/en-us/web/html/ +/files/en-us/web/css/ +/files/en-us/glossary/ +SPARSE + + print_status "Fetching MDN content (JavaScript, HTML, CSS, Web APIs)..." + git fetch --depth 1 origin main + git checkout main + else + print_status "Updating MDN content..." + git pull --depth 1 origin main 2>/dev/null || true + fi + + cd - > /dev/null || exit 1 + + # Create symlink for easier access + mkdir -p "$dest" + ln -sf "$mdn_repo/files/en-us/web/javascript" "$dest/javascript" + ln -sf "$mdn_repo/files/en-us/web/api" "$dest/web-api" + ln -sf "$mdn_repo/files/en-us/web/html" "$dest/html" + ln -sf "$mdn_repo/files/en-us/web/css" "$dest/css" + ln -sf "$mdn_repo/files/en-us/glossary" "$dest/glossary" + + build_js_index + print_success "MDN offline documentation ready" + + local doc_count + doc_count=$(find "$mdn_repo/files" -name "index.md" 2>/dev/null | wc -l) + print_status "Downloaded $doc_count documentation pages" +} + +build_js_index() { + print_status "Building MDN documentation index..." + local mdn_repo="$DOCS_DIR/mdn-content" + local index="$INDEX_DIR/js_index.txt" + + if [ ! -d "$mdn_repo/files" ]; then + print_error "MDN content not found" + return 1 + fi + + # Build comprehensive index from MDN markdown files + { + # Index JavaScript reference + find "$mdn_repo/files/en-us/web/javascript/reference" -name "index.md" 2>/dev/null | while read -r file; do + local dir + dir=$(dirname "$file") + local term + term=$(basename "$dir") + # Extract title from frontmatter if available + local title + title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "${term}|${file}|${title:-$term}" + done + + # Index Web APIs + find "$mdn_repo/files/en-us/web/api" -name "index.md" 2>/dev/null | while read -r file; do + local dir + dir=$(dirname "$file") + local term + term=$(basename "$dir") + local title + title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "${term}|${file}|${title:-$term}" + done + + # Index HTML elements + find "$mdn_repo/files/en-us/web/html/element" -name "index.md" 2>/dev/null | while read -r file; do + local dir + dir=$(dirname "$file") + local term + term=$(basename "$dir") + echo "${term}|${file}|HTML <${term}> element" + done + + # Index CSS properties + find "$mdn_repo/files/en-us/web/css" -maxdepth 2 -name "index.md" 2>/dev/null | while read -r file; do + local dir + dir=$(dirname "$file") + local term + term=$(basename "$dir") + local title + title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "${term}|${file}|${title:-$term}" + done + + # Index Glossary + find "$mdn_repo/files/en-us/glossary" -name "index.md" 2>/dev/null | while read -r file; do + local dir + dir=$(dirname "$file") + local term + term=$(basename "$dir") + local title + title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"') + echo "${term}|${file}|${title:-$term}" + done + } | sort -t'|' -k1,1 -u > "$index" + + local count + count=$(wc -l < "$index") + print_success "MDN index created with $count entries" +} + +#============================================================================== +# Rust Documentation (via rustup) +#============================================================================== +download_rust_docs() { + print_header "Rust Documentation" + local dest="$DOCS_DIR/rust" + + if command -v rustup &>/dev/null; then + print_status "Rust docs available via 'rustup doc'" + + # Get the rust doc path + local rust_doc_path + rust_doc_path=$(rustup doc --path 2>/dev/null | head -1 | xargs dirname 2>/dev/null) + + if [ -n "$rust_doc_path" ] && [ -d "$rust_doc_path" ]; then + ln -sf "$rust_doc_path" "$dest/std" + print_success "Linked Rust std docs from $rust_doc_path" + build_rust_index + fi + else + print_status "Rust not installed. Install with: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh" + fi +} + +build_rust_index() { + print_status "Building Rust documentation index..." + local index="$INDEX_DIR/rust_index.txt" + + if command -v rustup &>/dev/null; then + local rust_doc_path + rust_doc_path=$(rustup doc --path 2>/dev/null | head -1 | xargs dirname 2>/dev/null) + + if [ -d "$rust_doc_path/std" ]; then + find "$rust_doc_path/std" -name "*.html" 2>/dev/null | head -2000 | while read -r file; do + basename "$file" .html + done | sort -u > "$index" + fi + fi + + print_success "Rust index created" +} + +#============================================================================== +# Go Documentation +#============================================================================== +download_go_docs() { + print_header "Go Documentation" + local dest="$DOCS_DIR/go" + + if command -v go &>/dev/null; then + print_status "Go docs available via 'go doc'" + + # Create a reference of standard library packages + mkdir -p "$dest" + go list std 2>/dev/null > "$dest/stdlib_packages.txt" + + print_success "Go stdlib package list created" + build_go_index + else + print_status "Go not installed" + fi +} + +build_go_index() { + print_status "Building Go documentation index..." + local dest="$DOCS_DIR/go" + local index="$INDEX_DIR/go_index.txt" + + if [ -f "$dest/stdlib_packages.txt" ]; then + cp "$dest/stdlib_packages.txt" "$index" + fi + + print_success "Go index created" +} + +#============================================================================== +# Shell/Bash Documentation (man pages + built-in help) +#============================================================================== +download_shell_docs() { + print_header "Shell/Bash Documentation" + local dest="$DOCS_DIR/shell" + mkdir -p "$dest" + + print_status "Extracting bash built-in help..." + + # Extract help for all bash builtins + { + echo "# Bash Built-in Commands Reference" + echo "# Generated from 'help' command" + echo "" + + # Get list of builtins + compgen -b 2>/dev/null | while read -r builtin; do + echo "=== $builtin ===" + help "$builtin" 2>/dev/null || echo "No help available" + echo "" + done + } > "$dest/bash_builtins.txt" + + # Create quick reference for common commands + cat > "$dest/common_commands.txt" << 'SHELLREF' +# Common Shell Commands Quick Reference + +## File Operations +ls - List directory contents +cd - Change directory +pwd - Print working directory +cp - Copy files +mv - Move/rename files +rm - Remove files +mkdir - Create directory +rmdir - Remove empty directory +touch - Create empty file / update timestamp +cat - Concatenate and display files +head - Display first lines +tail - Display last lines +less - Page through file +find - Search for files +locate - Find files by name (uses database) + +## Text Processing +grep - Search text patterns +sed - Stream editor +awk - Pattern scanning and processing +cut - Remove sections from lines +sort - Sort lines +uniq - Report or omit repeated lines +wc - Word, line, character count +tr - Translate characters +diff - Compare files + +## Process Management +ps - Report process status +top - Display processes +kill - Send signal to process +pkill - Kill processes by name +bg - Background a process +fg - Foreground a process +jobs - List background jobs +nohup - Run immune to hangups + +## Networking +curl - Transfer data from URL +wget - Download files +ssh - Secure shell +scp - Secure copy +rsync - Remote sync +ping - Test connectivity +netstat - Network statistics +ss - Socket statistics + +## Archives +tar - Tape archive +gzip - Compress files +gunzip - Decompress files +zip - Package and compress +unzip - Extract zip archives + +## Permissions +chmod - Change file permissions +chown - Change file owner +chgrp - Change file group + +## Disk +df - Disk free space +du - Disk usage +mount - Mount filesystem +umount - Unmount filesystem + +## System +uname - System information +hostname - Show/set hostname +uptime - System uptime +free - Memory usage +date - Display/set date +cal - Display calendar + +## Bash Builtins +echo - Display text +printf - Formatted output +read - Read input +export - Set environment variable +source - Execute script in current shell +alias - Create command alias +type - Display command type +which - Locate command +declare - Declare variables +local - Local variable +set - Set shell options +shopt - Shell options +trap - Trap signals +eval - Evaluate arguments +exec - Execute command +SHELLREF + + print_success "Shell documentation created" + build_shell_index +} + +build_shell_index() { + print_status "Building Shell documentation index..." + local dest="$DOCS_DIR/shell" + local index="$INDEX_DIR/shell_index.txt" + + { + # Bash builtins + compgen -b 2>/dev/null | while read -r cmd; do + echo "$cmd $dest/bash_builtins.txt" + done + + # Common commands from man pages + for cmd in ls cd cp mv rm mkdir cat grep sed awk find sort curl wget tar chmod; do + man_path=$(man -w "$cmd" 2>/dev/null) + [ -n "$man_path" ] && echo "$cmd $man_path" + done + } | sort -u > "$index" + + print_success "Shell index created" +} + +#============================================================================== +# Zeal Docsets (cross-platform dash alternative) +#============================================================================== +setup_zeal_docsets() { + print_header "Zeal Docsets (Optional)" + + if ! command -v zeal &>/dev/null; then + print_status "Zeal not installed." + print_status "Install with: pacman -S zeal (or your package manager)" + print_status "Zeal provides a GUI for offline documentation" + return 0 + fi + + print_status "Zeal is installed. You can download docsets from within Zeal." + print_status "Recommended docsets: Python 3, JavaScript, TypeScript, C, C++" +} + +#============================================================================== +# Main +#============================================================================== +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +Download and setup offline documentation for programming languages. + +Options: + --all Download all available documentation + --python Download Python documentation + --cpp, --c Download C/C++ documentation (cppreference) + --js Download JavaScript documentation + --rust Download/link Rust documentation + --go Download/link Go documentation + --shell Generate Shell/Bash documentation + --zeal Setup Zeal docsets info + --status Show what's installed + --help Show this help + +Documentation is stored in: $DOCS_DIR + +Examples: + $0 --all # Download everything + $0 --python --cpp # Download Python and C++ docs + $0 --status # Check what's installed +EOF +} + +show_status() { + print_header "Offline Documentation Status" + echo "Documentation directory: $DOCS_DIR" + echo "" + + for lang in python c_cpp javascript rust go shell; do + dir="$DOCS_DIR/$lang" + if [ -d "$dir" ] && [ "$(ls -A "$dir" 2>/dev/null)" ]; then + size=$(du -sh "$dir" 2>/dev/null | cut -f1) + print_success "$lang: installed ($size)" + else + print_error "$lang: not installed" + fi + done + + echo "" + echo "Index files:" + ls -la "$INDEX_DIR"/*.txt 2>/dev/null || echo "No indexes built yet" +} + +main() { + setup_dirs + + if [ $# -eq 0 ]; then + usage + exit 0 + fi + + while [ $# -gt 0 ]; do + case "$1" in + --all) + download_python_docs + download_cpp_docs + download_js_docs + download_rust_docs + download_go_docs + download_shell_docs + setup_zeal_docsets + ;; + --python) + download_python_docs + ;; + --cpp|--c|--c++) + download_cpp_docs + ;; + --js|--javascript) + download_js_docs + ;; + --rust) + download_rust_docs + ;; + --go) + download_go_docs + ;; + --shell|--bash) + download_shell_docs + ;; + --zeal) + setup_zeal_docsets + ;; + --status) + show_status + ;; + --help|-h) + usage + exit 0 + ;; + *) + print_error "Unknown option: $1" + usage + exit 1 + ;; + esac + shift + done + + echo "" + print_header "Setup Complete" + echo "Documentation stored in: $DOCS_DIR" + echo "" + echo "Use 'lookup_docs.sh [language]' to search documentation" +} + +main "$@"