feat: sick mode

This commit is contained in:
Krzysztof kuhy Rudnicki 2026-01-07 17:03:07 +01:00
parent 206ac437e8
commit c72ddb6ddb
9 changed files with 4110 additions and 0 deletions

12
.gitignore vendored
View File

@ -3,3 +3,15 @@ scripts/features/.nextcloud_raspberry.conf
scripts/features/.raspberry_pi.conf scripts/features/.raspberry_pi.conf
.nextcloud_raspberry.conf .nextcloud_raspberry.conf
.raspberry_pi.conf .raspberry_pi.conf
# Generated study materials (repo_to_study.sh output)
study_materials/
**/study_materials/
documentation_links.md
anki_cards.txt
llm_anki_prompt.md
# Repo analysis temp files
/tmp/repo_analysis/
*.cscope.out*
tags

View File

@ -53,3 +53,4 @@ netsurf
amfora amfora
tartube tartube
youtube youtube
virtualbox

224
scripts/fixes/fix_anki.sh Executable file
View File

@ -0,0 +1,224 @@
#!/usr/bin/env bash
# Fix Anki startup issues caused by Python version mismatch or aqt namespace conflict
#
# Common causes addressed:
# - anki-git built for older Python version (e.g., 3.13) while system runs newer (e.g., 3.14)
# - python-aqtinstall package conflicts with Anki's aqt module (same namespace)
#
# Usage:
# ./fix_anki.sh # Auto-fix (rebuild anki-git)
# ./fix_anki.sh --check # Only check for issues, don't fix
set -euo pipefail
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=../lib/common.sh
source "$SCRIPT_DIR/../lib/common.sh"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
CHECK_ONLY=false
usage() {
cat <<EOF
fix_anki.sh - Fix Anki startup issues
Usage: $(basename "$0") [OPTIONS]
Options:
--check Only check for issues, don't apply fixes
-h, --help Show this help message
Common issues fixed:
- Python version mismatch (anki built for older Python)
- aqt namespace conflict with python-aqtinstall
EOF
}
log_info() { echo -e "${BLUE}[INFO]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_error() { echo -e "${RED}[ERROR]${NC} $*"; }
log_success() { echo -e "${GREEN}[OK]${NC} $*"; }
check_anki_installed() {
if pacman -Qi anki-git &>/dev/null; then
echo "anki-git"
elif pacman -Qi anki &>/dev/null; then
echo "anki"
elif pacman -Qi anki-bin &>/dev/null; then
echo "anki-bin"
else
echo ""
fi
}
get_system_python_version() {
python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')"
}
get_anki_python_version() {
local anki_pkg="$1"
local anki_path
anki_path=$(pacman -Ql "$anki_pkg" 2>/dev/null | grep -oP '/usr/lib/python\K[0-9]+\.[0-9]+' | head -1)
echo "$anki_path"
}
check_aqt_conflict() {
local sys_python="$1"
local aqt_path="/usr/lib/python${sys_python}/site-packages/aqt/__init__.py"
if [[ -f "$aqt_path" ]]; then
if grep -q "aqtinstall" "$aqt_path" 2>/dev/null; then
echo "aqtinstall"
elif grep -q "anki" "$aqt_path" 2>/dev/null; then
echo "anki"
else
echo "unknown"
fi
else
echo "none"
fi
}
main() {
# Parse arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--check)
CHECK_ONLY=true
shift
;;
-h | --help)
usage
exit 0
;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
log_info "Checking Anki installation..."
# Check which Anki package is installed
local anki_pkg
anki_pkg=$(check_anki_installed)
if [[ -z "$anki_pkg" ]]; then
log_error "Anki is not installed"
exit 1
fi
log_info "Found Anki package: $anki_pkg"
# Get Python versions
local sys_python anki_python
sys_python=$(get_system_python_version)
anki_python=$(get_anki_python_version "$anki_pkg")
log_info "System Python version: $sys_python"
log_info "Anki built for Python: ${anki_python:-unknown}"
local issues_found=false
# Check for Python version mismatch
if [[ -n "$anki_python" && "$sys_python" != "$anki_python" ]]; then
log_warn "Python version mismatch detected!"
log_warn " Anki was built for Python $anki_python but system runs Python $sys_python"
issues_found=true
fi
# Check for aqt namespace conflict
local aqt_owner
aqt_owner=$(check_aqt_conflict "$sys_python")
case "$aqt_owner" in
aqtinstall)
log_warn "aqt namespace conflict detected!"
log_warn " python-aqtinstall owns /usr/lib/python${sys_python}/site-packages/aqt/"
log_warn " This conflicts with Anki's aqt module"
issues_found=true
;;
anki)
log_success "aqt module belongs to Anki (correct)"
;;
none)
if [[ "$sys_python" != "$anki_python" ]]; then
log_warn "No aqt module found for Python $sys_python"
fi
;;
*)
log_warn "Unknown aqt module owner"
;;
esac
# Test if Anki actually works
log_info "Testing Anki startup..."
if python -c "from aqt import run" 2>/dev/null; then
log_success "Anki imports work correctly"
if [[ "$issues_found" == "false" ]]; then
log_success "No issues found with Anki installation"
exit 0
fi
else
log_error "Anki import test failed"
issues_found=true
fi
if [[ "$CHECK_ONLY" == "true" ]]; then
if [[ "$issues_found" == "true" ]]; then
echo ""
log_info "Issues detected. Run without --check to fix."
exit 1
fi
exit 0
fi
# Apply fixes
echo ""
log_info "Applying fixes..."
# Check if python-aqtinstall is installed and remove it if nothing depends on it
if pacman -Qi python-aqtinstall &>/dev/null; then
local required_by
required_by=$(pacman -Qi python-aqtinstall | grep "Required By" | cut -d: -f2 | xargs)
if [[ "$required_by" == "None" ]]; then
log_info "Removing python-aqtinstall (conflicts with Anki)..."
sudo pacman -R --noconfirm python-aqtinstall
else
log_warn "python-aqtinstall is required by: $required_by"
log_warn "Cannot remove automatically. You may need to resolve this manually."
fi
fi
# Rebuild anki package
if [[ "$anki_pkg" == "anki-git" ]]; then
log_info "Rebuilding anki-git for Python $sys_python..."
yay -S anki-git --rebuild --noconfirm
elif [[ "$anki_pkg" == "anki" ]]; then
log_info "Reinstalling anki..."
sudo pacman -S anki --noconfirm
else
log_warn "Package $anki_pkg may need manual rebuild"
fi
# Verify fix
echo ""
log_info "Verifying fix..."
if python -c "from aqt import run" 2>/dev/null; then
log_success "Anki is now working!"
echo ""
echo "You can start Anki with: anki"
else
log_error "Fix may not have worked. Please check manually."
exit 1
fi
}
main "$@"

1
scripts/fixes/stremio Submodule

@ -0,0 +1 @@
Subproject commit 4c3c9996956221f0cae49f69e0597e33aee33ee1

866
scripts/utils/analyze_repo.sh Executable file
View File

@ -0,0 +1,866 @@
#!/bin/bash
# Analyze a git repository for most-used keywords, functions, etc.
# Usage: ./analyze_repo.sh [repo_url_or_local_path] [output_dir] [--no-ignore]
#
# Examples:
# ./analyze_repo.sh https://github.com/torvalds/linux # Clone from URL
# ./analyze_repo.sh /path/to/local/repo # Use local directory
# ./analyze_repo.sh . # Analyze current directory
# ./analyze_repo.sh . /tmp/out --no-ignore # Include node_modules, etc.
set -e
# Parse arguments
INPUT=""
WORK_DIR=""
RESPECT_GITIGNORE=true
for arg in "$@"; do
case "$arg" in
--no-ignore)
RESPECT_GITIGNORE=false
;;
*)
if [ -z "$INPUT" ]; then
INPUT="$arg"
elif [ -z "$WORK_DIR" ]; then
WORK_DIR="$arg"
fi
;;
esac
done
INPUT="${INPUT:-https://github.com/torvalds/linux}"
WORK_DIR="${WORK_DIR:-/tmp/repo_analysis}"
TOP_N=50 # Number of top results to show
# Directories to exclude (unless --no-ignore is used)
EXCLUDE_DIRS="node_modules|\.git|vendor|\.venv|venv|__pycache__|\.cache|build|dist|\.next|\.nuxt|target|\.tox|\.eggs"
# Detect if input is a URL or local path
is_url() {
[[ "$1" =~ ^https?:// ]] || [[ "$1" =~ ^git@ ]] || [[ "$1" =~ ^ssh:// ]]
}
IS_LOCAL=false
if is_url "$INPUT"; then
REPO_URL="$INPUT"
REPO_NAME=$(basename "$REPO_URL" .git)
REPO_DIR="$WORK_DIR/$REPO_NAME"
else
# Local path - resolve to absolute path
IS_LOCAL=true
if [ -d "$INPUT" ]; then
REPO_DIR=$(cd "$INPUT" && pwd)
REPO_NAME=$(basename "$REPO_DIR")
else
echo "Error: '$INPUT' is not a valid directory or URL"
exit 1
fi
fi
RESULTS_DIR="$WORK_DIR/results_${REPO_NAME}"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
print_header() {
echo ""
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} $1${NC}"
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo ""
}
print_subheader() {
echo ""
echo -e "${YELLOW}--- $1 ---${NC}"
echo ""
}
# Check if we're in a git repository
is_git_repo() {
git rev-parse --is-inside-work-tree &>/dev/null
}
# Helper function to find files while respecting exclusions
# Usage: find_files "*.c" or find_files "*.py" "*.pyx"
find_files() {
local patterns=("$@")
if [ "$RESPECT_GITIGNORE" = true ]; then
if is_git_repo; then
# Use git ls-files which respects .gitignore automatically
# This includes tracked files and untracked files not in .gitignore
local git_patterns=()
for pat in "${patterns[@]}"; do
git_patterns+=("$pat")
done
# Get tracked files + untracked (but not ignored) files
{
git ls-files -- "${git_patterns[@]}" 2>/dev/null
git ls-files --others --exclude-standard -- "${git_patterns[@]}" 2>/dev/null
} | sort -u
else
# Not a git repo - fall back to manual exclusion
local find_args=()
for i in "${!patterns[@]}"; do
if [ $i -eq 0 ]; then
find_args+=(-name "${patterns[$i]}")
else
find_args+=(-o -name "${patterns[$i]}")
fi
done
find . -type f \( "${find_args[@]}" \) 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/"
fi
else
# No filtering - find all files
local find_args=()
for i in "${!patterns[@]}"; do
if [ $i -eq 0 ]; then
find_args+=(-name "${patterns[$i]}")
else
find_args+=(-o -name "${patterns[$i]}")
fi
done
find . -type f \( "${find_args[@]}" \) 2>/dev/null
fi
}
# Count files matching pattern (respecting exclusions)
count_files() {
find_files "$@" | wc -l
}
#==============================================================================
# STEP 0: Install Missing Tools
#==============================================================================
install_missing_tools() {
local MISSING_TOOLS=()
local MISSING_AUR=()
# Check for required tools
command -v git &> /dev/null || MISSING_TOOLS+=("git")
command -v ctags &> /dev/null || MISSING_TOOLS+=("ctags")
command -v cscope &> /dev/null || MISSING_TOOLS+=("cscope")
command -v clang &> /dev/null || MISSING_TOOLS+=("clang")
command -v ugrep &> /dev/null || MISSING_TOOLS+=("ugrep")
# Check for AUR tools
command -v tokei &> /dev/null || MISSING_AUR+=("tokei")
command -v scc &> /dev/null || MISSING_AUR+=("scc")
# Check for Rust 'counts' tool (install via cargo if missing)
if ! command -v counts &> /dev/null; then
if command -v cargo &> /dev/null; then
echo "Installing 'counts' via cargo (fast word counter)..."
cargo install counts 2>/dev/null || echo "Warning: counts install failed, will use Python fallback"
fi
fi
# If nothing is missing, return
if [ ${#MISSING_TOOLS[@]} -eq 0 ] && [ ${#MISSING_AUR[@]} -eq 0 ]; then
echo -e "${GREEN}All required tools are installed.${NC}"
return 0
fi
echo -e "${YELLOW}Missing tools detected. Installing...${NC}"
# Detect package manager
if command -v pacman &> /dev/null; then
# Arch Linux
if [ ${#MISSING_TOOLS[@]} -gt 0 ]; then
echo "Installing from official repos: ${MISSING_TOOLS[*]}"
sudo pacman -S --needed --noconfirm "${MISSING_TOOLS[@]}"
fi
if [ ${#MISSING_AUR[@]} -gt 0 ]; then
# Find or install AUR helper
if command -v yay &> /dev/null; then
AUR_HELPER="yay"
elif command -v paru &> /dev/null; then
AUR_HELPER="paru"
else
echo "No AUR helper found. Installing yay..."
sudo pacman -S --needed --noconfirm base-devel git
TEMP_DIR=$(mktemp -d)
git clone https://aur.archlinux.org/yay.git "$TEMP_DIR/yay"
(cd "$TEMP_DIR/yay" && makepkg -si --noconfirm)
rm -rf "$TEMP_DIR"
AUR_HELPER="yay"
fi
echo "Installing from AUR: ${MISSING_AUR[*]}"
$AUR_HELPER -S --needed --noconfirm "${MISSING_AUR[@]}"
fi
elif command -v apt-get &> /dev/null; then
# Debian/Ubuntu
echo "Installing tools via apt..."
sudo apt-get update
# Map tool names to package names
APT_PACKAGES=()
for tool in "${MISSING_TOOLS[@]}"; do
case $tool in
ctags) APT_PACKAGES+=("universal-ctags") ;;
ugrep) APT_PACKAGES+=("ugrep") ;;
*) APT_PACKAGES+=("$tool") ;;
esac
done
[ ${#APT_PACKAGES[@]} -gt 0 ] && sudo apt-get install -y "${APT_PACKAGES[@]}"
# Install tokei/scc via cargo or snap
for aur_tool in "${MISSING_AUR[@]}"; do
if command -v cargo &> /dev/null; then
echo "Installing $aur_tool via cargo..."
cargo install "$aur_tool"
elif command -v snap &> /dev/null; then
echo "Installing $aur_tool via snap..."
sudo snap install "$aur_tool"
else
echo -e "${YELLOW}Warning: Cannot install $aur_tool. Install cargo or snap first.${NC}"
fi
done
elif command -v dnf &> /dev/null; then
# Fedora
echo "Installing tools via dnf..."
sudo dnf install -y "${MISSING_TOOLS[@]}" "${MISSING_AUR[@]}" 2>/dev/null || {
# tokei/scc might need cargo
for aur_tool in "${MISSING_AUR[@]}"; do
if command -v cargo &> /dev/null; then
cargo install "$aur_tool"
fi
done
}
elif command -v brew &> /dev/null; then
# macOS with Homebrew
echo "Installing tools via brew..."
ALL_TOOLS=("${MISSING_TOOLS[@]}" "${MISSING_AUR[@]}")
brew install "${ALL_TOOLS[@]}"
else
echo -e "${RED}Unknown package manager. Please install these tools manually:${NC}"
echo " Official: ${MISSING_TOOLS[*]}"
echo " Additional: ${MISSING_AUR[*]}"
exit 1
fi
echo -e "${GREEN}Tool installation complete.${NC}"
}
print_header "STEP 0: Checking/Installing Required Tools"
install_missing_tools
# Create directories
mkdir -p "$WORK_DIR" "$RESULTS_DIR"
#==============================================================================
# STEP 1: Clone or Use Local Repository
#==============================================================================
print_header "STEP 1: Repository Setup"
if [ "$IS_LOCAL" = true ]; then
echo "Using local repository: $REPO_DIR"
if [ ! -d "$REPO_DIR" ]; then
echo "Error: Directory does not exist: $REPO_DIR"
exit 1
fi
else
# Remote URL - clone it
if [ -d "$REPO_DIR" ]; then
echo "Repository already exists at $REPO_DIR"
echo "Updating..."
cd "$REPO_DIR"
git pull --depth 1 2>/dev/null || echo "Update skipped (shallow clone)"
else
echo "Cloning $REPO_URL (shallow clone for speed)..."
git clone --depth 1 "$REPO_URL" "$REPO_DIR"
fi
fi
cd "$REPO_DIR"
echo "Repository: $REPO_NAME"
echo "Location: $REPO_DIR"
echo "Repository size: $(du -sh . | cut -f1)"
if [ "$RESPECT_GITIGNORE" = true ] && is_git_repo; then
# Count files respecting .gitignore
FILE_COUNT=$({ git ls-files 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null; } | sort -u | wc -l)
echo "Files: $FILE_COUNT (respecting .gitignore)"
elif [ "$RESPECT_GITIGNORE" = true ]; then
echo "Files: $(find . -type f 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" | wc -l) (excluding common dirs)"
else
echo "Files: $(find . -type f | wc -l)"
fi
#==============================================================================
# STEP 2: Basic Statistics with tokei
#==============================================================================
print_header "STEP 2: Code Statistics with tokei"
echo "Running tokei..."
tokei . | tee "$RESULTS_DIR/tokei_stats.txt"
#==============================================================================
# STEP 3: Code Statistics with scc
#==============================================================================
print_header "STEP 3: Code Statistics with scc (includes complexity)"
echo "Running scc..."
scc . | tee "$RESULTS_DIR/scc_stats.txt"
print_subheader "Top 10 Most Complex Files"
scc --by-file --sort complexity . 2>/dev/null | head -20 | tee "$RESULTS_DIR/scc_complexity.txt"
#==============================================================================
# STEP 4: Fast Keyword Analysis (Code vs Comments) - Multi-Language
#==============================================================================
print_header "STEP 4: Fast Keyword Analysis (Code vs Comments)"
# Helper function for fast word counting
# Uses 'counts' (Rust) if available, falls back to Python Counter
fast_count() {
local top_n="${1:-50}"
if command -v counts &> /dev/null; then
counts 2>/dev/null | head -$((top_n + 1)) | tail -$top_n
else
python3 -c "
import sys
from collections import Counter
c = Counter(line.rstrip() for line in sys.stdin)
for word, count in c.most_common($top_n):
print(f'{count} {word}')
"
fi
}
#------------------------------------------------------------------------------
# Language Detection and Configuration
#------------------------------------------------------------------------------
print_subheader "Detecting languages in repository..."
if [ "$RESPECT_GITIGNORE" = true ]; then
if is_git_repo; then
echo -e "${YELLOW}Note: Respecting .gitignore (excludes node_modules, build outputs, etc.)${NC}"
else
echo -e "${YELLOW}Note: Excluding common directories (node_modules, .git, vendor, etc.)${NC}"
fi
echo " Use --no-ignore to include everything."
echo ""
fi
# Count files by extension to detect primary languages (using helper)
declare -A LANG_FILES
LANG_FILES[c]=$(count_files "*.c")
LANG_FILES[cpp]=$(count_files "*.cpp" "*.cc" "*.cxx")
LANG_FILES[h]=$(count_files "*.h" "*.hpp" "*.hxx")
LANG_FILES[python]=$(count_files "*.py")
LANG_FILES[javascript]=$(count_files "*.js")
LANG_FILES[typescript]=$(count_files "*.ts" "*.tsx")
LANG_FILES[java]=$(count_files "*.java")
LANG_FILES[go]=$(count_files "*.go")
LANG_FILES[rust]=$(count_files "*.rs")
LANG_FILES[ruby]=$(count_files "*.rb")
LANG_FILES[shell]=$(count_files "*.sh" "*.bash")
echo "Files found by language:"
for lang in c cpp h python javascript typescript java go rust ruby shell; do
count=${LANG_FILES[$lang]}
[ "$count" -gt 0 ] && echo " $lang: $count files"
done
# Determine which language families are present
HAS_C_FAMILY=false
HAS_PYTHON=false
HAS_JS_FAMILY=false
HAS_SHELL=false
HAS_RUBY=false
HAS_GO=false
HAS_RUST=false
HAS_JAVA=false
(( ${LANG_FILES[c]} + ${LANG_FILES[cpp]} + ${LANG_FILES[h]} > 0 )) && HAS_C_FAMILY=true
(( ${LANG_FILES[python]} > 0 )) && HAS_PYTHON=true
(( ${LANG_FILES[javascript]} + ${LANG_FILES[typescript]} > 0 )) && HAS_JS_FAMILY=true
(( ${LANG_FILES[shell]} > 0 )) && HAS_SHELL=true
(( ${LANG_FILES[ruby]} > 0 )) && HAS_RUBY=true
(( ${LANG_FILES[go]} > 0 )) && HAS_GO=true
(( ${LANG_FILES[rust]} > 0 )) && HAS_RUST=true
(( ${LANG_FILES[java]} > 0 )) && HAS_JAVA=true
#------------------------------------------------------------------------------
# Language-specific keyword definitions
#------------------------------------------------------------------------------
# C/C++ keywords
KEYWORDS_C="auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while|inline|restrict|_Bool|_Complex|_Imaginary"
KEYWORDS_CPP="$KEYWORDS_C|alignas|alignof|and|and_eq|asm|atomic_cancel|atomic_commit|atomic_noexcept|bitand|bitor|bool|catch|char16_t|char32_t|char8_t|class|co_await|co_return|co_yield|compl|concept|const_cast|consteval|constexpr|constinit|decltype|delete|dynamic_cast|explicit|export|false|friend|mutable|namespace|new|noexcept|not|not_eq|nullptr|operator|or|or_eq|override|private|protected|public|reflexpr|reinterpret_cast|requires|static_assert|static_cast|synchronized|template|this|thread_local|throw|true|try|typeid|typename|using|virtual|wchar_t|xor|xor_eq"
# Python keywords
KEYWORDS_PYTHON="False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield"
# JavaScript/TypeScript keywords
KEYWORDS_JS="abstract|arguments|await|boolean|break|byte|case|catch|char|class|const|continue|debugger|default|delete|do|double|else|enum|eval|export|extends|false|final|finally|float|for|function|goto|if|implements|import|in|instanceof|int|interface|let|long|native|new|null|package|private|protected|public|return|short|static|super|switch|synchronized|this|throw|throws|transient|true|try|typeof|undefined|var|void|volatile|while|with|yield"
KEYWORDS_TS="$KEYWORDS_JS|any|as|asserts|bigint|declare|get|infer|intrinsic|is|keyof|module|namespace|never|out|override|readonly|require|set|string|symbol|type|unique|unknown"
# Go keywords
KEYWORDS_GO="break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var"
# Rust keywords
KEYWORDS_RUST="as|async|await|break|const|continue|crate|dyn|else|enum|extern|false|fn|for|if|impl|in|let|loop|match|mod|move|mut|pub|ref|return|self|Self|static|struct|super|trait|true|type|unsafe|use|where|while"
# Ruby keywords
KEYWORDS_RUBY="BEGIN|END|alias|and|begin|break|case|class|def|defined|do|else|elsif|end|ensure|false|for|if|in|module|next|nil|not|or|redo|rescue|retry|return|self|super|then|true|undef|unless|until|when|while|yield"
#------------------------------------------------------------------------------
# Multi-language comment processing - KEEP LANGUAGES SEPARATE
#------------------------------------------------------------------------------
print_subheader "Processing source files (separating code from comments)..."
# Create per-language output directory
mkdir -p "$RESULTS_DIR/per_language"
COMMENTS_TEMP=$(mktemp)
trap 'rm -f "$COMMENTS_TEMP" /tmp/code_*.tmp 2>/dev/null' EXIT
declare -A LANG_CODE_FILES
# Process C/C++ files
if $HAS_C_FAMILY; then
echo "Processing C/C++ files..."
LANG_CODE_FILES[c_cpp]=$(mktemp /tmp/code_c_cpp.XXXXXX.tmp)
find_files "*.c" "*.cpp" "*.cc" "*.cxx" "*.h" "*.hpp" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[c_cpp]}"
# Extract and strip C-style comments
perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[c_cpp]}" >> "$COMMENTS_TEMP"
perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[c_cpp]}" > "${LANG_CODE_FILES[c_cpp]}.clean"
mv "${LANG_CODE_FILES[c_cpp]}.clean" "${LANG_CODE_FILES[c_cpp]}"
fi
# Process JavaScript files (separate from TypeScript)
if $HAS_JS_FAMILY; then
echo "Processing JavaScript files..."
LANG_CODE_FILES[javascript]=$(mktemp /tmp/code_js.XXXXXX.tmp)
find_files "*.js" "*.jsx" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[javascript]}"
echo "Processing TypeScript files..."
LANG_CODE_FILES[typescript]=$(mktemp /tmp/code_ts.XXXXXX.tmp)
find_files "*.ts" "*.tsx" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[typescript]}"
# Extract and strip comments from both
for lang_file in "${LANG_CODE_FILES[javascript]}" "${LANG_CODE_FILES[typescript]}"; do
[ ! -s "$lang_file" ] && continue
perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "$lang_file" >> "$COMMENTS_TEMP"
perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "$lang_file" > "${lang_file}.clean"
mv "${lang_file}.clean" "$lang_file"
done
fi
# Process Python files
if $HAS_PYTHON; then
echo "Processing Python files..."
LANG_CODE_FILES[python]=$(mktemp /tmp/code_python.XXXXXX.tmp)
find_files "*.py" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[python]}"
perl -ne 'if (/^\s*#(.*)/) { print "$1\n"; } elsif (/#(.*)$/) { print "$1\n"; }' "${LANG_CODE_FILES[python]}" >> "$COMMENTS_TEMP"
perl -0777 -ne 'while (/"""(.+?)"""/gs) { print "$1\n"; } while (/'"'"''"'"''"'"'(.+?)'"'"''"'"''"'"'/gs) { print "$1\n"; }' "${LANG_CODE_FILES[python]}" >> "$COMMENTS_TEMP"
perl -pe 's/#.*$//' "${LANG_CODE_FILES[python]}" | perl -0777 -pe 's/""".*?"""//gs; s/'"'"''"'"''"'"'.*?'"'"''"'"''"'"'//gs' > "${LANG_CODE_FILES[python]}.clean"
mv "${LANG_CODE_FILES[python]}.clean" "${LANG_CODE_FILES[python]}"
fi
# Process Go files
if $HAS_GO; then
echo "Processing Go files..."
LANG_CODE_FILES[go]=$(mktemp /tmp/code_go.XXXXXX.tmp)
find_files "*.go" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[go]}"
perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[go]}" >> "$COMMENTS_TEMP"
perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[go]}" > "${LANG_CODE_FILES[go]}.clean"
mv "${LANG_CODE_FILES[go]}.clean" "${LANG_CODE_FILES[go]}"
fi
# Process Rust files
if $HAS_RUST; then
echo "Processing Rust files..."
LANG_CODE_FILES[rust]=$(mktemp /tmp/code_rust.XXXXXX.tmp)
find_files "*.rs" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[rust]}"
perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[rust]}" >> "$COMMENTS_TEMP"
perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[rust]}" > "${LANG_CODE_FILES[rust]}.clean"
mv "${LANG_CODE_FILES[rust]}.clean" "${LANG_CODE_FILES[rust]}"
fi
# Process Ruby files
if $HAS_RUBY; then
echo "Processing Ruby files..."
LANG_CODE_FILES[ruby]=$(mktemp /tmp/code_ruby.XXXXXX.tmp)
find_files "*.rb" | head -5000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[ruby]}"
perl -ne 'if (/#(.*)$/) { print "$1\n"; }' "${LANG_CODE_FILES[ruby]}" >> "$COMMENTS_TEMP"
perl -0777 -ne 'while (/=begin(.+?)=end/gs) { print "$1\n"; }' "${LANG_CODE_FILES[ruby]}" >> "$COMMENTS_TEMP"
perl -pe 's/#.*$//' "${LANG_CODE_FILES[ruby]}" | perl -0777 -pe 's/=begin.*?=end//gs' > "${LANG_CODE_FILES[ruby]}.clean"
mv "${LANG_CODE_FILES[ruby]}.clean" "${LANG_CODE_FILES[ruby]}"
fi
# Process Shell files
if $HAS_SHELL; then
echo "Processing Shell files..."
LANG_CODE_FILES[shell]=$(mktemp /tmp/code_shell.XXXXXX.tmp)
find_files "*.sh" "*.bash" | head -5000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[shell]}"
perl -ne 'if (/^\s*#(.*)/ && !/^#!/) { print "$1\n"; } elsif (/#(.*)$/) { print "$1\n"; }' "${LANG_CODE_FILES[shell]}" >> "$COMMENTS_TEMP"
perl -pe 's/#.*$//' "${LANG_CODE_FILES[shell]}" > "${LANG_CODE_FILES[shell]}.clean"
mv "${LANG_CODE_FILES[shell]}.clean" "${LANG_CODE_FILES[shell]}"
fi
# Process Java files
if $HAS_JAVA; then
echo "Processing Java files..."
LANG_CODE_FILES[java]=$(mktemp /tmp/code_java.XXXXXX.tmp)
find_files "*.java" | head -15000 | xargs cat 2>/dev/null > "${LANG_CODE_FILES[java]}"
perl -0777 -ne 'while (/\/\*(.+?)\*\//gs) { print "$1\n"; } while (/\/\/([^\n]*)/g) { print "$1\n"; }' "${LANG_CODE_FILES[java]}" >> "$COMMENTS_TEMP"
perl -0777 -pe 's|/\*.*?\*/||gs; s|//[^\n]*||g;' "${LANG_CODE_FILES[java]}" > "${LANG_CODE_FILES[java]}.clean"
mv "${LANG_CODE_FILES[java]}.clean" "${LANG_CODE_FILES[java]}"
fi
COMMENT_LINES=$(wc -l < "$COMMENTS_TEMP")
echo ""
echo "Processed languages: ${!LANG_CODE_FILES[*]}"
echo "Total comment lines: $COMMENT_LINES"
#------------------------------------------------------------------------------
# Per-Language Keyword Analysis - Each language gets its own file
#------------------------------------------------------------------------------
print_subheader "Per-Language Keyword Analysis"
# Map language names to keyword variables
declare -A LANG_KEYWORDS
LANG_KEYWORDS[c_cpp]="$KEYWORDS_CPP"
LANG_KEYWORDS[python]="$KEYWORDS_PYTHON"
LANG_KEYWORDS[javascript]="$KEYWORDS_JS"
LANG_KEYWORDS[typescript]="$KEYWORDS_TS"
LANG_KEYWORDS[go]="$KEYWORDS_GO"
LANG_KEYWORDS[rust]="$KEYWORDS_RUST"
LANG_KEYWORDS[ruby]="$KEYWORDS_RUBY"
LANG_KEYWORDS[shell]="$KEYWORDS_SHELL"
LANG_KEYWORDS[java]="$KEYWORDS_JAVA"
# Analyze each language separately
for lang in "${!LANG_CODE_FILES[@]}"; do
code_file="${LANG_CODE_FILES[$lang]}"
keywords="${LANG_KEYWORDS[$lang]}"
output_file="$RESULTS_DIR/per_language/keywords_${lang}.txt"
if [ -f "$code_file" ] && [ -s "$code_file" ] && [ -n "$keywords" ]; then
echo ""
echo -e "${YELLOW}=== $lang Keywords ===${NC}"
ugrep -o "\b($keywords)\b" "$code_file" 2>/dev/null \
| fast_count 50 \
| tee "$output_file"
fi
done
#------------------------------------------------------------------------------
# Per-Language Function Analysis
#------------------------------------------------------------------------------
print_subheader "Per-Language Function Calls"
for lang in "${!LANG_CODE_FILES[@]}"; do
code_file="${LANG_CODE_FILES[$lang]}"
output_file="$RESULTS_DIR/per_language/functions_${lang}.txt"
if [ -f "$code_file" ] && [ -s "$code_file" ]; then
echo ""
echo -e "${YELLOW}=== $lang Functions ===${NC}"
ugrep -o '\b[a-zA-Z_][a-zA-Z0-9_]*\s*\(' "$code_file" 2>/dev/null \
| sed 's/\s*(//' \
| grep -vE '^(if|for|while|switch|catch|elif)$' \
| fast_count 30 \
| tee "$output_file"
fi
done
#------------------------------------------------------------------------------
# Per-Language Import Analysis
#------------------------------------------------------------------------------
print_subheader "Per-Language Imports/Includes"
# C/C++ includes
if [ -n "${LANG_CODE_FILES[c_cpp]}" ] && [ -s "${LANG_CODE_FILES[c_cpp]}" ]; then
echo -e "${YELLOW}=== C/C++ Includes ===${NC}"
ugrep -o '#include\s*[<"][^>"]+[>"]' "${LANG_CODE_FILES[c_cpp]}" 2>/dev/null \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_c_cpp.txt"
fi
# Python imports
if [ -n "${LANG_CODE_FILES[python]}" ] && [ -s "${LANG_CODE_FILES[python]}" ]; then
echo ""
echo -e "${YELLOW}=== Python Imports ===${NC}"
ugrep -o '^\s*(from\s+\S+\s+import\s+\S+|import\s+\S+)' "${LANG_CODE_FILES[python]}" 2>/dev/null \
| sed 's/^\s*//' \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_python.txt"
fi
# JavaScript imports
if [ -n "${LANG_CODE_FILES[javascript]}" ] && [ -s "${LANG_CODE_FILES[javascript]}" ]; then
echo ""
echo -e "${YELLOW}=== JavaScript Imports ===${NC}"
ugrep -o "(import\s+.*\s+from\s+['\"][^'\"]+['\"]|require\s*\(['\"][^'\"]+['\"]\))" "${LANG_CODE_FILES[javascript]}" 2>/dev/null \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_javascript.txt"
fi
# TypeScript imports
if [ -n "${LANG_CODE_FILES[typescript]}" ] && [ -s "${LANG_CODE_FILES[typescript]}" ]; then
echo ""
echo -e "${YELLOW}=== TypeScript Imports ===${NC}"
ugrep -o "(import\s+.*\s+from\s+['\"][^'\"]+['\"]|require\s*\(['\"][^'\"]+['\"]\))" "${LANG_CODE_FILES[typescript]}" 2>/dev/null \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_typescript.txt"
fi
# Go imports
if [ -n "${LANG_CODE_FILES[go]}" ] && [ -s "${LANG_CODE_FILES[go]}" ]; then
echo ""
echo -e "${YELLOW}=== Go Imports ===${NC}"
ugrep -o '"[^"]+/[^"]+"' "${LANG_CODE_FILES[go]}" 2>/dev/null \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_go.txt"
fi
# Rust use statements
if [ -n "${LANG_CODE_FILES[rust]}" ] && [ -s "${LANG_CODE_FILES[rust]}" ]; then
echo ""
echo -e "${YELLOW}=== Rust Use Statements ===${NC}"
ugrep -o '^\s*use\s+[^;]+' "${LANG_CODE_FILES[rust]}" 2>/dev/null \
| sed 's/^\s*//' \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_rust.txt"
fi
# Java imports
if [ -n "${LANG_CODE_FILES[java]}" ] && [ -s "${LANG_CODE_FILES[java]}" ]; then
echo ""
echo -e "${YELLOW}=== Java Imports ===${NC}"
ugrep -o '^\s*import\s+[^;]+' "${LANG_CODE_FILES[java]}" 2>/dev/null \
| sed 's/^\s*//' \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_java.txt"
fi
# Ruby requires
if [ -n "${LANG_CODE_FILES[ruby]}" ] && [ -s "${LANG_CODE_FILES[ruby]}" ]; then
echo ""
echo -e "${YELLOW}=== Ruby Requires ===${NC}"
ugrep -o "(require\s+['\"][^'\"]+['\"]|require_relative\s+['\"][^'\"]+['\"])" "${LANG_CODE_FILES[ruby]}" 2>/dev/null \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_ruby.txt"
fi
# Shell sources
if [ -n "${LANG_CODE_FILES[shell]}" ] && [ -s "${LANG_CODE_FILES[shell]}" ]; then
echo ""
echo -e "${YELLOW}=== Shell Sources ===${NC}"
ugrep -o '(source\s+[^\s]+|\.\s+[^\s]+)' "${LANG_CODE_FILES[shell]}" 2>/dev/null \
| fast_count 30 \
| tee "$RESULTS_DIR/per_language/imports_shell.txt"
fi
#------------------------------------------------------------------------------
# Combined Analysis (for overview/backward compatibility)
#------------------------------------------------------------------------------
print_subheader "Combined Code Identifiers (all languages)"
# Create combined CODE_TEMP
CODE_TEMP=$(mktemp)
for lang_file in "${LANG_CODE_FILES[@]}"; do
[ -f "$lang_file" ] && cat "$lang_file" >> "$CODE_TEMP"
done
ugrep -o '\b[a-zA-Z_][a-zA-Z0-9_]*\b' "$CODE_TEMP" 2>/dev/null \
| fast_count $TOP_N \
| tee "$RESULTS_DIR/code_identifiers.txt"
print_subheader "Most Used Words in COMMENTS"
ugrep -o '\b[a-zA-Z_][a-zA-Z0-9_]*\b' "$COMMENTS_TEMP" 2>/dev/null \
| fast_count $TOP_N \
| tee "$RESULTS_DIR/comment_words.txt"
# Create combined files from per-language analysis (for backward compatibility)
{
echo "# Combined keywords from all languages"
echo "# Format: count keyword (from per_language/keywords_*.txt)"
cat "$RESULTS_DIR/per_language"/keywords_*.txt 2>/dev/null | grep -v '^$' | sort -t' ' -k1 -nr | head -100
} > "$RESULTS_DIR/grep_keywords.txt"
{
echo "# Combined functions from all languages"
echo "# See per_language/functions_*.txt for language-specific breakdown"
cat "$RESULTS_DIR/per_language"/functions_*.txt 2>/dev/null | grep -v '^$' | sort -t' ' -k1 -nr | head -100
} > "$RESULTS_DIR/grep_function_calls.txt"
{
echo "# Combined imports from all languages"
echo "# See per_language/imports_*.txt for language-specific breakdown"
cat "$RESULTS_DIR/per_language"/imports_*.txt 2>/dev/null | grep -v '^$' | sort -t' ' -k1 -nr | head -100
} > "$RESULTS_DIR/grep_imports.txt"
# List what per-language files were created
echo ""
echo "Per-language analysis files created:"
ls -la "$RESULTS_DIR/per_language/" 2>/dev/null | grep -v '^total' | awk '{print " " $NF}'
print_subheader "Generating tags (this may take a while)..."
# Generate tags for different kinds
ctags -R --languages=C,C++ --c-kinds=+fp --fields=+lK -f "$RESULTS_DIR/tags" . 2>/dev/null || true
if [ -f "$RESULTS_DIR/tags" ]; then
TOTAL_TAGS=$(grep -ac '^[^!]' "$RESULTS_DIR/tags" 2>/dev/null || echo "0")
echo "Total symbols found: $TOTAL_TAGS"
print_subheader "Most Common Symbol Names"
# Fast: use cut + counts instead of awk + sort | uniq
# -a flag treats tags file as text (may contain binary-like patterns)
grep -a '^[^!]' "$RESULTS_DIR/tags" | cut -f1 | fast_count $TOP_N \
| tee "$RESULTS_DIR/ctags_symbols.txt"
print_subheader "Symbol Types Distribution"
# Fast: extract single-letter kind code after ;" and count
grep -aoP ';"\t\K[a-z]' "$RESULTS_DIR/tags" 2>/dev/null | fast_count 20 | while read count kind; do
case $kind in
f) echo "$count functions" ;;
v) echo "$count variables" ;;
s) echo "$count structs" ;;
t) echo "$count typedefs" ;;
e) echo "$count enum values" ;;
g) echo "$count enums" ;;
m) echo "$count struct/union members" ;;
d) echo "$count macro definitions" ;;
p) echo "$count function prototypes" ;;
u) echo "$count unions" ;;
c) echo "$count classes" ;;
n) echo "$count namespaces" ;;
*) echo "$count kind=$kind" ;;
esac
done | tee "$RESULTS_DIR/ctags_kinds.txt"
fi
#==============================================================================
# STEP 6: cscope Analysis
#==============================================================================
print_header "STEP 6: cscope Database Analysis"
print_subheader "Building cscope database..."
# Find all C source files (respecting .gitignore if available)
if [ "$RESPECT_GITIGNORE" = true ] && is_git_repo; then
{ git ls-files -- '*.c' '*.h' 2>/dev/null; git ls-files --others --exclude-standard -- '*.c' '*.h' 2>/dev/null; } | sort -u > "$RESULTS_DIR/cscope.files"
elif [ "$RESPECT_GITIGNORE" = true ]; then
find . \( -name "*.c" -o -name "*.h" \) -type f 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" > "$RESULTS_DIR/cscope.files"
else
find . \( -name "*.c" -o -name "*.h" \) -type f > "$RESULTS_DIR/cscope.files" 2>/dev/null
fi
FILE_COUNT=$(wc -l < "$RESULTS_DIR/cscope.files")
echo "Found $FILE_COUNT source files"
# Build cscope database (can take a while for large repos)
echo "Building database (this may take several minutes for Linux kernel)..."
cscope -b -q -i "$RESULTS_DIR/cscope.files" -f "$RESULTS_DIR/cscope.out" 2>/dev/null || true
if [ -f "$RESULTS_DIR/cscope.out" ]; then
echo "Database built successfully"
echo "Database size: $(du -sh "$RESULTS_DIR/cscope.out" | cut -f1)"
print_subheader "Example: Finding callers of 'printk' function"
cscope -d -f "$RESULTS_DIR/cscope.out" -L -3 printk 2>/dev/null | head -20 || echo "No results"
print_subheader "Example: Finding definition of 'struct file'"
cscope -d -f "$RESULTS_DIR/cscope.out" -L -1 "struct file" 2>/dev/null | head -10 || echo "No results"
fi
#==============================================================================
# STEP 7: clang AST Analysis (if available)
#==============================================================================
print_header "STEP 7: clang-based Analysis (AST-level)"
print_subheader "Analyzing a sample file with clang AST dump"
# Find a simple C file to analyze (respecting .gitignore)
if [ "$RESPECT_GITIGNORE" = true ] && is_git_repo; then
SAMPLE_FILE=$(git ls-files -- '*.c' 2>/dev/null | head -20 | while read -r f; do
[ -f "$f" ] && [ "$(stat -c%s "$f" 2>/dev/null || echo 999999)" -lt 51200 ] && echo "$f"
done | head -1)
elif [ "$RESPECT_GITIGNORE" = true ]; then
SAMPLE_FILE=$(find . -name "*.c" -size -50k -type f 2>/dev/null | grep -Ev "/($EXCLUDE_DIRS)/" | head -1)
else
SAMPLE_FILE=$(find . -name "*.c" -size -50k 2>/dev/null | head -1)
fi
if [ -n "$SAMPLE_FILE" ]; then
echo "Sample file: $SAMPLE_FILE"
echo ""
echo "Function declarations in this file:"
clang -Xclang -ast-dump -fsyntax-only "$SAMPLE_FILE" 2>/dev/null \
| grep -E "FunctionDecl.*<.*>" \
| head -20 \
| sed 's/.*FunctionDecl.*<[^>]*> / /' \
| tee "$RESULTS_DIR/clang_sample_functions.txt" || echo "Analysis failed (missing headers)"
fi
print_subheader "Note: Full clang analysis requires compile_commands.json"
echo "For proper AST analysis of the Linux kernel, you need to:"
echo " 1. Configure the kernel: make defconfig"
echo " 2. Generate compile_commands.json: make compile_commands.json"
echo " 3. Use clang-query or clang-check with the database"
#==============================================================================
# STEP 8: Summary
#==============================================================================
print_header "ANALYSIS COMPLETE"
echo "Results saved to: $RESULTS_DIR/"
echo ""
ls -la "$RESULTS_DIR/"
echo ""
echo -e "${GREEN}Quick Summary:${NC}"
echo ""
if [ -f "$RESULTS_DIR/grep_keywords.txt" ]; then
echo "Top 5 Language Keywords (in code):"
head -5 "$RESULTS_DIR/grep_keywords.txt" | awk '{printf " %s: %s times\n", $2, $1}'
fi
echo ""
if [ -f "$RESULTS_DIR/grep_function_calls.txt" ]; then
echo "Top 5 Function/Method Calls (in code):"
head -5 "$RESULTS_DIR/grep_function_calls.txt" | awk '{printf " %s(): %s times\n", $2, $1}'
fi
echo ""
if [ -f "$RESULTS_DIR/comment_words.txt" ]; then
echo "Top 5 Words in Comments:"
head -5 "$RESULTS_DIR/comment_words.txt" | awk '{printf " %s: %s times\n", $2, $1}'
fi
echo ""
if [ -f "$RESULTS_DIR/grep_imports.txt" ]; then
echo "Top 5 Imports/Includes:"
head -5 "$RESULTS_DIR/grep_imports.txt" | awk '{count=$1; $1=""; printf " %s: %s times\n", substr($0,2), count}'
fi
echo ""
echo -e "${BLUE}To explore interactively with cscope (C/C++ only):${NC}"
echo " cd $REPO_DIR && cscope -d -f $RESULTS_DIR/cscope.out"
echo ""
echo -e "${BLUE}To browse tags in vim:${NC}"
echo " cd $REPO_DIR && vim -t main"

View File

@ -0,0 +1,987 @@
#!/bin/bash
# Generate study materials (documentation links + Anki cards) from repo analysis
# Usage: ./generate_study_materials.sh <results_dir> [--top N] [--languages "python,c,js"]
#
# Examples:
# ./generate_study_materials.sh /tmp/repo_analysis/results_myproject
# ./generate_study_materials.sh /tmp/repo_analysis/results_linux --top 20 --languages "c"
# ./generate_study_materials.sh ./results --languages "python,typescript"
set -e
#==============================================================================
# Configuration
#==============================================================================
RESULTS_DIR="${1:-.}"
TOP_N=30
LANGUAGES="auto" # Will detect from results
# Parse arguments
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
--top)
TOP_N="$2"
shift 2
;;
--languages)
LANGUAGES="$2"
shift 2
;;
*)
shift
;;
esac
done
# Output files
DOCS_FILE="$RESULTS_DIR/documentation_links.md"
ANKI_FILE="$RESULTS_DIR/anki_cards.txt"
LLM_PROMPT_FILE="$RESULTS_DIR/llm_anki_prompt.md"
# Offline documentation setup
OFFLINE_DOCS_DIR="${OFFLINE_DOCS_DIR:-$HOME/.local/share/offline-docs}"
LOOKUP_SCRIPT="$(dirname "$0")/lookup_docs.sh"
USE_OFFLINE_DOCS=false
# Check if offline docs are available
if [ -d "$OFFLINE_DOCS_DIR" ] && [ -x "$LOOKUP_SCRIPT" ]; then
USE_OFFLINE_DOCS=true
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m'
#==============================================================================
# Offline Documentation Lookup (preferred if available)
#==============================================================================
lookup_offline() {
local term="$1"
local lang="$2"
local import_line="$3" # Optional: full import line for context
if ! $USE_OFFLINE_DOCS; then
return 1
fi
local result
if [ -n "$import_line" ]; then
# Use import-aware lookup - get the line with the file path
result=$("$LOOKUP_SCRIPT" --import "$import_line" "$lang" 2>/dev/null | grep "^/" | head -1)
else
result=$("$LOOKUP_SCRIPT" "$term" "$lang" 2>/dev/null | grep "^File:" | head -1 | sed 's/^File: //')
fi
if [ -n "$result" ]; then
# Extract file path (before the | separator)
local file_path
file_path=$(echo "$result" | cut -d'|' -f1)
if [ -n "$file_path" ]; then
echo "$file_path"
return 0
fi
fi
return 1
}
#==============================================================================
# Documentation URL Generators (online fallback)
#==============================================================================
# Python documentation
python_doc_url() {
local term="$1"
local type="$2" # keyword, builtin, module
case "$term" in
# Keywords
if|else|elif|for|while|try|except|finally|with|as|import|from|def|class|return|yield|raise|pass|break|continue|and|or|not|in|is|lambda|global|nonlocal|assert|del|True|False|None|async|await)
echo "https://docs.python.org/3/reference/compound_stmts.html"
;;
# Built-in functions
print|len|range|type|str|int|float|list|dict|set|tuple|bool|open|input|format|sorted|reversed|enumerate|zip|map|filter|any|all|sum|min|max|abs|round|isinstance|issubclass|hasattr|getattr|setattr|delattr|callable|iter|next|super|property|staticmethod|classmethod|vars|dir|help|id|hash|repr|ascii|bin|hex|oct|chr|ord|eval|exec|compile)
echo "https://docs.python.org/3/library/functions.html#$term"
;;
# Common modules
os|sys|re|json|datetime|collections|itertools|functools|pathlib|subprocess|threading|multiprocessing|asyncio|typing|dataclasses|unittest|pytest|logging|argparse|configparser)
echo "https://docs.python.org/3/library/$term.html"
;;
# Testing
MagicMock|Mock|patch|PropertyMock)
echo "https://docs.python.org/3/library/unittest.mock.html"
;;
*)
echo "https://docs.python.org/3/search.html?q=$term"
;;
esac
}
# JavaScript/TypeScript documentation (MDN)
js_doc_url() {
local term="$1"
case "$term" in
# Keywords & statements
if|else|for|while|do|switch|case|break|continue|return|throw|try|catch|finally|function|class|const|let|var|new|this|super|import|export|default|async|await|yield|typeof|instanceof|in|of|delete|void)
echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements"
;;
# Global objects
Array|Object|String|Number|Boolean|Symbol|Map|Set|WeakMap|WeakSet|Date|RegExp|Error|Promise|Proxy|Reflect|JSON|Math|Intl)
echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/$term"
;;
# Array methods
map|filter|reduce|forEach|find|findIndex|some|every|includes|indexOf|slice|splice|concat|join|push|pop|shift|unshift|sort|reverse|flat|flatMap)
echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/$term"
;;
# String methods
split|replace|match|search|substring|substr|toLowerCase|toUpperCase|trim|padStart|padEnd|startsWith|endsWith|charAt|charCodeAt)
echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/$term"
;;
# Promise methods
then|resolve|reject|all|race|allSettled|any)
echo "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/$term"
;;
# Common Web APIs
fetch|console|document|window|localStorage|sessionStorage|setTimeout|setInterval|addEventListener|querySelector|querySelectorAll)
echo "https://developer.mozilla.org/en-US/docs/Web/API"
;;
*)
echo "https://developer.mozilla.org/en-US/search?q=$term"
;;
esac
}
# TypeScript-specific documentation
ts_doc_url() {
local term="$1"
case "$term" in
interface|type|enum|namespace|declare|readonly|abstract|implements|extends|keyof|typeof|infer|as|is|asserts|satisfies|override)
echo "https://www.typescriptlang.org/docs/handbook/2/everyday-types.html"
;;
Partial|Required|Readonly|Record|Pick|Omit|Exclude|Extract|NonNullable|ReturnType|Parameters|InstanceType|Awaited)
echo "https://www.typescriptlang.org/docs/handbook/utility-types.html"
;;
*)
# Fall back to JS docs for runtime features
js_doc_url "$term"
;;
esac
}
# C documentation
c_doc_url() {
local term="$1"
case "$term" in
# Keywords
if|else|for|while|do|switch|case|break|continue|return|goto|sizeof|typedef|struct|union|enum|const|static|extern|register|volatile|inline|restrict|_Bool|_Complex|_Imaginary|_Alignas|_Alignof|_Atomic|_Generic|_Noreturn|_Static_assert|_Thread_local)
echo "https://en.cppreference.com/w/c/keyword/$term"
;;
# Standard library headers
stdio|stdlib|string|math|time|ctype|stdint|stdbool|stddef|limits|float|errno|assert|signal|setjmp|stdarg|locale)
echo "https://en.cppreference.com/w/c/header/${term}.h"
;;
# Common functions
printf|fprintf|sprintf|snprintf|scanf|fscanf|sscanf|fopen|fclose|fread|fwrite|fgets|fputs|fseek|ftell|rewind|fflush)
echo "https://en.cppreference.com/w/c/io"
;;
malloc|calloc|realloc|free|memcpy|memmove|memset|memcmp)
echo "https://en.cppreference.com/w/c/memory"
;;
strlen|strcpy|strncpy|strcat|strncat|strcmp|strncmp|strchr|strrchr|strstr|strtok)
echo "https://en.cppreference.com/w/c/string/byte"
;;
*)
echo "https://en.cppreference.com/mwiki/index.php?search=$term"
;;
esac
}
# C++ documentation
cpp_doc_url() {
local term="$1"
case "$term" in
# C++ specific keywords
class|public|private|protected|virtual|override|final|explicit|mutable|constexpr|consteval|constinit|concept|requires|co_await|co_yield|co_return|nullptr|noexcept|decltype|auto|template|typename|namespace|using|new|delete|throw|try|catch|static_cast|dynamic_cast|const_cast|reinterpret_cast)
echo "https://en.cppreference.com/w/cpp/keyword/$term"
;;
# STL containers
vector|list|deque|array|forward_list|set|map|unordered_set|unordered_map|multiset|multimap|stack|queue|priority_queue)
echo "https://en.cppreference.com/w/cpp/container/$term"
;;
# STL algorithms
sort|find|copy|move|transform|accumulate|count|remove|unique|reverse|rotate|shuffle|partition|merge|binary_search|lower_bound|upper_bound)
echo "https://en.cppreference.com/w/cpp/algorithm/$term"
;;
# Smart pointers
unique_ptr|shared_ptr|weak_ptr|make_unique|make_shared)
echo "https://en.cppreference.com/w/cpp/memory/$term"
;;
# Common classes
string|string_view|optional|variant|any|tuple|pair|function|bind|thread|mutex|future|promise|chrono)
echo "https://en.cppreference.com/w/cpp/utility"
;;
*)
# Try C docs as fallback
c_doc_url "$term"
;;
esac
}
# Rust documentation
rust_doc_url() {
local term="$1"
case "$term" in
# Keywords
fn|let|mut|const|static|if|else|match|loop|while|for|in|break|continue|return|struct|enum|impl|trait|type|where|pub|mod|use|crate|self|super|async|await|move|ref|dyn|unsafe|extern)
echo "https://doc.rust-lang.org/std/keyword.$term.html"
;;
# Common types
Option|Result|Vec|String|Box|Rc|Arc|Cell|RefCell|Mutex|RwLock|HashMap|HashSet|BTreeMap|BTreeSet)
echo "https://doc.rust-lang.org/std/$term"
;;
# Traits
Clone|Copy|Debug|Default|Eq|PartialEq|Ord|PartialOrd|Hash|Display|From|Into|AsRef|AsMut|Deref|DerefMut|Iterator|IntoIterator|Send|Sync)
echo "https://doc.rust-lang.org/std/$term"
;;
# Macros
println|print|format|vec|panic|assert|assert_eq|assert_ne|debug_assert|todo|unimplemented|unreachable)
echo "https://doc.rust-lang.org/std/macro.$term.html"
;;
*)
echo "https://doc.rust-lang.org/std/?search=$term"
;;
esac
}
# Go documentation
go_doc_url() {
local term="$1"
case "$term" in
# Keywords
func|var|const|type|struct|interface|map|chan|go|select|defer|if|else|for|range|switch|case|default|break|continue|return|goto|fallthrough|package|import)
echo "https://go.dev/ref/spec"
;;
# Built-in functions
make|new|len|cap|append|copy|delete|close|panic|recover|print|println|complex|real|imag)
echo "https://pkg.go.dev/builtin#$term"
;;
# Common packages
fmt|os|io|net|http|json|time|strings|strconv|errors|context|sync|testing|reflect|regexp|sort|math|crypto|encoding|bufio|bytes|path|filepath)
echo "https://pkg.go.dev/$term"
;;
*)
echo "https://pkg.go.dev/search?q=$term"
;;
esac
}
# Ruby documentation
ruby_doc_url() {
local term="$1"
case "$term" in
# Keywords
if|else|elsif|unless|case|when|while|until|for|do|end|begin|rescue|ensure|raise|return|break|next|redo|retry|yield|def|class|module|self|super|nil|true|false|and|or|not|in|then|alias|defined|__FILE__|__LINE__|__ENCODING__)
echo "https://ruby-doc.org/docs/keywords/1.9/"
;;
# Core classes
String|Array|Hash|Integer|Float|Symbol|Range|Regexp|Time|Date|File|Dir|IO|Proc|Lambda|Method|Thread|Mutex|Fiber)
echo "https://ruby-doc.org/core/classes/$term.html"
;;
# Enumerable methods
each|map|select|reject|find|reduce|inject|collect|detect|sort|sort_by|group_by|partition|any|all|none|one|count|first|last|take|drop)
echo "https://ruby-doc.org/core/Enumerable.html"
;;
*)
echo "https://ruby-doc.org/search.html?q=$term"
;;
esac
}
# Java documentation
java_doc_url() {
local term="$1"
case "$term" in
# Keywords
if|else|for|while|do|switch|case|break|continue|return|throw|try|catch|finally|class|interface|enum|extends|implements|new|this|super|static|final|abstract|public|private|protected|void|null|true|false|instanceof|synchronized|volatile|transient|native|strictfp|assert|default|package|import)
echo "https://docs.oracle.com/javase/tutorial/java/nutsandbolts/"
;;
# Common classes
String|Integer|Long|Double|Float|Boolean|Character|Object|Class|System|Math|Arrays|Collections|List|ArrayList|LinkedList|Map|HashMap|TreeMap|Set|HashSet|TreeSet|Queue|Stack|Optional|Stream)
echo "https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/$term.html"
;;
*)
echo "https://docs.oracle.com/en/java/javase/17/docs/api/search.html?q=$term"
;;
esac
}
# Shell documentation
shell_doc_url() {
local term="$1"
case "$term" in
# Built-in commands
if|then|else|elif|fi|for|while|until|do|done|case|esac|in|function|select|time|coproc)
echo "https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs"
;;
echo|printf|read|declare|local|export|unset|set|shopt|alias|source|eval|exec|exit|return|break|continue|shift|trap|wait|kill|jobs|bg|fg|disown|suspend|logout|cd|pwd|pushd|popd|dirs|type|which|command|builtin|enable|help|hash|bind|complete|compgen|compopt)
echo "https://www.gnu.org/software/bash/manual/bash.html#Shell-Builtin-Commands"
;;
# Common external commands
grep|sed|awk|find|xargs|sort|uniq|cut|tr|head|tail|wc|cat|tee|diff|patch|tar|gzip|zip|curl|wget|ssh|scp|rsync|git|make|chmod|chown|chgrp|ln|cp|mv|rm|mkdir|rmdir|touch|ls|stat|file|df|du|free|top|ps|kill|pkill|pgrep|nohup|screen|tmux)
echo "https://man7.org/linux/man-pages/man1/$term.1.html"
;;
*)
echo "https://www.gnu.org/software/bash/manual/bash.html"
;;
esac
}
#==============================================================================
# Get documentation URL for a term based on detected language
#==============================================================================
get_doc_url() {
local term="$1"
local lang="$2"
local import_line="$3" # Optional: full import for context
# Try offline docs first
local offline_result
offline_result=$(lookup_offline "$term" "$lang" "$import_line")
if [ -n "$offline_result" ]; then
echo "$offline_result"
return 0
fi
# For TypeScript, also try JavaScript offline docs (most TS keywords are JS)
if [[ "$lang" == "typescript" || "$lang" == "ts" || "$lang" == "tsx" ]]; then
offline_result=$(lookup_offline "$term" "js" "$import_line")
if [ -n "$offline_result" ]; then
echo "$offline_result"
return 0
fi
fi
# Fall back to online URLs
case "$lang" in
python|py)
python_doc_url "$term"
;;
javascript|js|jsx)
js_doc_url "$term"
;;
typescript|ts|tsx)
# For TypeScript, try JS doc first (since most keywords are shared)
# Only use TS-specific docs for TS-only features
case "$term" in
interface|type|enum|namespace|declare|readonly|abstract|implements|keyof|infer|as|is|asserts|satisfies|override|Partial|Required|Readonly|Record|Pick|Omit|Exclude|Extract|NonNullable|ReturnType|Parameters|InstanceType|Awaited)
ts_doc_url "$term"
;;
*)
js_doc_url "$term"
;;
esac
;;
c)
c_doc_url "$term"
;;
cpp|c++|cc|cxx)
cpp_doc_url "$term"
;;
rust|rs)
rust_doc_url "$term"
;;
go)
go_doc_url "$term"
;;
ruby|rb)
ruby_doc_url "$term"
;;
java)
java_doc_url "$term"
;;
shell|bash|sh)
shell_doc_url "$term"
;;
*)
echo "https://devdocs.io/#q=$term"
;;
esac
}
#==============================================================================
# Detect primary language from results
#==============================================================================
detect_language() {
if [ -f "$RESULTS_DIR/tokei_stats.txt" ]; then
# Parse tokei output to find most used language
grep -E "^\s+(Python|JavaScript|TypeScript|C\+\+|C |Rust|Go|Ruby|Java|Shell)" "$RESULTS_DIR/tokei_stats.txt" 2>/dev/null \
| head -1 \
| awk '{print tolower($1)}' \
| sed 's/c++/cpp/'
else
echo "unknown"
fi
}
#==============================================================================
# Main Processing
#==============================================================================
# Check if results directory exists
if [ ! -d "$RESULTS_DIR" ]; then
echo -e "${RED}Error: Results directory not found: $RESULTS_DIR${NC}"
echo "Run analyze_repo.sh first to generate analysis results."
exit 1
fi
# Detect or use specified language
if [ "$LANGUAGES" = "auto" ]; then
PRIMARY_LANG=$(detect_language)
echo -e "${BLUE}Detected primary language: ${GREEN}$PRIMARY_LANG${NC}"
else
PRIMARY_LANG=$(echo "$LANGUAGES" | cut -d',' -f1)
echo -e "${BLUE}Using specified language: ${GREEN}$PRIMARY_LANG${NC}"
fi
echo ""
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} Generating Study Materials${NC}"
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo ""
# Patch for generate_study_materials.sh - use per-language files
#==============================================================================
# Generate Documentation Links (Markdown)
#==============================================================================
echo -e "${YELLOW}Generating documentation links...${NC}"
cat > "$DOCS_FILE" << 'EOF'
# Documentation Links for Code Review
This document contains links to official documentation for the most commonly used
functions, keywords, and patterns found in the analyzed codebase.
**Note:** Items are grouped by language for accurate documentation links.
---
EOF
# Check for per-language files
PER_LANG_DIR="$RESULTS_DIR/per_language"
if [ -d "$PER_LANG_DIR" ]; then
echo -e "${GREEN}Using per-language analysis files${NC}"
# Map internal lang names to doc function names
lang_to_doc() {
case "$1" in
c_cpp) echo "cpp" ;;
javascript) echo "js" ;;
typescript) echo "ts" ;;
shell) echo "bash" ;;
*) echo "$1" ;;
esac
}
# Process keywords by language
echo "## Language Keywords" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
for keyword_file in "$PER_LANG_DIR"/keywords_*.txt; do
[ ! -f "$keyword_file" ] && continue
[ ! -s "$keyword_file" ] && continue
# Extract language name from filename
lang=$(basename "$keyword_file" | sed 's/keywords_//; s/\.txt//')
doc_lang=$(lang_to_doc "$lang")
# Format language name for display
case "$lang" in
c_cpp) display_lang="C/C++" ;;
javascript) display_lang="JavaScript" ;;
typescript) display_lang="TypeScript" ;;
python) display_lang="Python" ;;
rust) display_lang="Rust" ;;
go) display_lang="Go" ;;
ruby) display_lang="Ruby" ;;
java) display_lang="Java" ;;
shell) display_lang="Shell/Bash" ;;
*) display_lang="$lang" ;;
esac
echo "### $display_lang Keywords" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
echo "| Keyword | Count | Documentation |" >> "$DOCS_FILE"
echo "|---------|-------|---------------|" >> "$DOCS_FILE"
head -$TOP_N "$keyword_file" | while read -r count term; do
[ -z "$term" ] && continue
[[ "$term" =~ ^[#] ]] && continue # Skip comment lines
url=$(get_doc_url "$term" "$doc_lang")
echo "| \`$term\` | $count | [docs]($url) |" >> "$DOCS_FILE"
done
echo "" >> "$DOCS_FILE"
done
# Process functions by language
echo "## Function/Method Calls" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
for func_file in "$PER_LANG_DIR"/functions_*.txt; do
[ ! -f "$func_file" ] && continue
[ ! -s "$func_file" ] && continue
lang=$(basename "$func_file" | sed 's/functions_//; s/\.txt//')
doc_lang=$(lang_to_doc "$lang")
case "$lang" in
c_cpp) display_lang="C/C++" ;;
javascript) display_lang="JavaScript" ;;
typescript) display_lang="TypeScript" ;;
python) display_lang="Python" ;;
rust) display_lang="Rust" ;;
go) display_lang="Go" ;;
ruby) display_lang="Ruby" ;;
java) display_lang="Java" ;;
shell) display_lang="Shell/Bash" ;;
*) display_lang="$lang" ;;
esac
echo "### $display_lang Functions" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
echo "| Function | Count | Documentation |" >> "$DOCS_FILE"
echo "|----------|-------|---------------|" >> "$DOCS_FILE"
head -$TOP_N "$func_file" | while read -r count term; do
[ -z "$term" ] && continue
[[ "$term" =~ ^(if|for|while|switch|catch|elif)$ ]] && continue
url=$(get_doc_url "$term" "$doc_lang")
echo "| \`$term()\` | $count | [docs]($url) |" >> "$DOCS_FILE"
done
echo "" >> "$DOCS_FILE"
done
# Process imports by language
echo "## Imports/Includes" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
for import_file in "$PER_LANG_DIR"/imports_*.txt; do
[ ! -f "$import_file" ] && continue
[ ! -s "$import_file" ] && continue
lang=$(basename "$import_file" | sed 's/imports_//; s/\.txt//')
doc_lang=$(lang_to_doc "$lang")
case "$lang" in
c_cpp) display_lang="C/C++ (#include)" ;;
javascript) display_lang="JavaScript (import/require)" ;;
typescript) display_lang="TypeScript (import)" ;;
python) display_lang="Python (import/from)" ;;
rust) display_lang="Rust (use)" ;;
go) display_lang="Go (import)" ;;
ruby) display_lang="Ruby (require)" ;;
java) display_lang="Java (import)" ;;
shell) display_lang="Shell (source)" ;;
*) display_lang="$lang" ;;
esac
echo "### $display_lang" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
echo "| Import | Count | Documentation |" >> "$DOCS_FILE"
echo "|--------|-------|---------------|" >> "$DOCS_FILE"
head -20 "$import_file" | while read -r count import; do
[ -z "$import" ] && continue
# For offline lookup, pass the full import line for better context
url=$(get_doc_url "" "$doc_lang" "$import")
if [ -z "$url" ] || [[ "$url" == *"search.html"* ]]; then
# Fallback: extract module and try again
module=$(echo "$import" | sed -E 's/.*[<"]([^">]+)[">].*/\1/' | sed 's|.*/||' | sed 's/\..*$//')
url=$(get_doc_url "$module" "$doc_lang")
fi
import_escaped=$(echo "$import" | sed 's/|/\\|/g')
echo "| \`$import_escaped\` | $count | [docs]($url) |" >> "$DOCS_FILE"
done
echo "" >> "$DOCS_FILE"
done
else
# Fallback to combined files (old behavior)
echo -e "${YELLOW}No per-language files found, using combined analysis${NC}"
if [ -f "$RESULTS_DIR/grep_keywords.txt" ]; then
echo "## Language Keywords" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
echo "| Keyword | Count | Documentation |" >> "$DOCS_FILE"
echo "|---------|-------|---------------|" >> "$DOCS_FILE"
head -$TOP_N "$RESULTS_DIR/grep_keywords.txt" | while read -r count term; do
[ -z "$term" ] && continue
url=$(get_doc_url "$term" "$PRIMARY_LANG")
echo "| \`$term\` | $count | [docs]($url) |" >> "$DOCS_FILE"
done
echo "" >> "$DOCS_FILE"
fi
if [ -f "$RESULTS_DIR/grep_function_calls.txt" ]; then
echo "## Function/Method Calls" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
echo "| Function | Count | Documentation |" >> "$DOCS_FILE"
echo "|----------|-------|---------------|" >> "$DOCS_FILE"
head -$TOP_N "$RESULTS_DIR/grep_function_calls.txt" | while read -r count term; do
[ -z "$term" ] && continue
[[ "$term" =~ ^(if|for|while|switch|catch)$ ]] && continue
url=$(get_doc_url "$term" "$PRIMARY_LANG")
echo "| \`$term()\` | $count | [docs]($url) |" >> "$DOCS_FILE"
done
echo "" >> "$DOCS_FILE"
fi
if [ -f "$RESULTS_DIR/grep_imports.txt" ]; then
echo "## Imports/Includes" >> "$DOCS_FILE"
echo "" >> "$DOCS_FILE"
echo "| Import | Count | Documentation |" >> "$DOCS_FILE"
echo "|--------|-------|---------------|" >> "$DOCS_FILE"
head -20 "$RESULTS_DIR/grep_imports.txt" | while read -r count import; do
[ -z "$import" ] && continue
module=$(echo "$import" | sed -E 's/.*[<"]([^">]+)[">].*/\1/' | sed 's|.*/||' | sed 's/\..*$//')
url=$(get_doc_url "$module" "$PRIMARY_LANG")
import_escaped=$(echo "$import" | sed 's/|/\\|/g')
echo "| \`$import_escaped\` | $count | [docs]($url) |" >> "$DOCS_FILE"
done
echo "" >> "$DOCS_FILE"
fi
fi
echo "" >> "$DOCS_FILE"
echo "---" >> "$DOCS_FILE"
echo "*Generated by analyze_repo.sh + generate_study_materials.sh*" >> "$DOCS_FILE"
echo -e "${GREEN}Created: $DOCS_FILE${NC}"
#==============================================================================
# Generate Anki Cards (Tab-separated for import)
#==============================================================================
echo -e "${YELLOW}Generating Anki cards...${NC}"
cat > "$ANKI_FILE" << 'EOF'
# Anki Import File
# Format: Front<TAB>Back<TAB>Tags
# Import with: File -> Import, select "Fields separated by: Tab"
#
# Card Types:
# 1. "What does X do?" - For functions/methods
# 2. "When to use X?" - For keywords/patterns
# 3. "What is the syntax for X?" - For language constructs
#
EOF
# Generate cards for top keywords
if [ -f "$RESULTS_DIR/grep_keywords.txt" ]; then
echo "# Keywords" >> "$ANKI_FILE"
head -$TOP_N "$RESULTS_DIR/grep_keywords.txt" | while read -r count term; do
[ -z "$term" ] && continue
url=$(get_doc_url "$term" "$PRIMARY_LANG")
# Create different card types based on term type
case "$term" in
if|else|elif|elseif|switch|case|match)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tConditional control flow - executes code based on boolean conditions. See: $url\t${PRIMARY_LANG}::keywords::control-flow" >> "$ANKI_FILE"
;;
for|while|loop|do|until)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tLoop construct - repeats code execution. See: $url\t${PRIMARY_LANG}::keywords::loops" >> "$ANKI_FILE"
;;
try|except|catch|finally|raise|throw)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tException handling - manages errors and exceptional conditions. See: $url\t${PRIMARY_LANG}::keywords::exceptions" >> "$ANKI_FILE"
;;
class|struct|interface|trait|impl)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tType definition - defines custom data structures. See: $url\t${PRIMARY_LANG}::keywords::types" >> "$ANKI_FILE"
;;
def|fn|func|function)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tFunction definition - declares a reusable block of code. See: $url\t${PRIMARY_LANG}::keywords::functions" >> "$ANKI_FILE"
;;
import|from|use|require|include)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tModule import - brings external code into current scope. See: $url\t${PRIMARY_LANG}::keywords::modules" >> "$ANKI_FILE"
;;
async|await|yield)
echo -e "What is the purpose of \`$term\` in $PRIMARY_LANG?\tAsynchronous programming - handles concurrent operations. See: $url\t${PRIMARY_LANG}::keywords::async" >> "$ANKI_FILE"
;;
*)
echo -e "What does the keyword \`$term\` do in $PRIMARY_LANG?\t[FILL: Look up at $url]\t${PRIMARY_LANG}::keywords" >> "$ANKI_FILE"
;;
esac
done
fi
# Generate cards for top functions
if [ -f "$RESULTS_DIR/grep_function_calls.txt" ]; then
echo "" >> "$ANKI_FILE"
echo "# Functions" >> "$ANKI_FILE"
head -$TOP_N "$RESULTS_DIR/grep_function_calls.txt" | while read -r count term; do
[ -z "$term" ] && continue
[[ "$term" =~ ^(if|for|while|switch|catch)$ ]] && continue
url=$(get_doc_url "$term" "$PRIMARY_LANG")
echo -e "What does \`$term()\` do in $PRIMARY_LANG? (Used $count times)\t[FILL: Look up at $url]\t${PRIMARY_LANG}::functions" >> "$ANKI_FILE"
done
fi
echo -e "${GREEN}Created: $ANKI_FILE${NC}"
#==============================================================================
# Generate LLM Prompt for Anki Card Generation
#==============================================================================
echo -e "${YELLOW}Generating LLM prompt...${NC}"
# Helper function to get doc link for a term
get_llm_doc_link() {
local term="$1"
local lang="$2"
local is_import="$3" # "true" if it's an import line
# Check if it's an internal/project-specific item
if [[ "$term" =~ ^@/ ]] || [[ "$term" =~ ^\./ ]] || [[ "$term" =~ ^app\. ]] || [[ "$term" =~ ^src/ ]] || [[ "$term" =~ from\ \'@/ ]] || [[ "$term" =~ from\ \'\./ ]]; then
echo "[INTERNAL - SKIP]"
return
fi
# Try offline lookup
local offline_result
if [ "$is_import" = "true" ]; then
offline_result=$("$LOOKUP_SCRIPT" --import "$term" "$lang" 2>/dev/null | grep "^/" | head -1)
else
offline_result=$("$LOOKUP_SCRIPT" "$term" "$lang" 2>/dev/null | grep "^File:" | head -1 | sed 's/^File: //')
fi
if [ -n "$offline_result" ]; then
echo "$offline_result"
else
echo "[NO OFFLINE DOC]"
fi
}
# Generate keywords with doc links
generate_keywords_with_docs() {
local keywords_file="$RESULTS_DIR/grep_keywords.txt"
[ ! -f "$keywords_file" ] && echo "No keywords found" && return
head -$TOP_N "$keywords_file" | grep -v '^#' | while read -r line; do
local count=$(echo "$line" | awk '{print $1}')
local keyword=$(echo "$line" | awk '{print $2}')
[ -z "$keyword" ] && continue
local doc_link=$(get_llm_doc_link "$keyword" "$PRIMARY_LANG" "false")
echo "$count $keyword$doc_link"
done
}
# Generate functions with doc links
generate_functions_with_docs() {
local functions_file="$RESULTS_DIR/grep_function_calls.txt"
[ ! -f "$functions_file" ] && echo "No functions found" && return
head -$TOP_N "$functions_file" | grep -v '^#' | while read -r line; do
local count=$(echo "$line" | awk '{print $1}')
local func=$(echo "$line" | awk '{print $2}')
# Skip single-letter functions (minified code) or empty
if [ -z "$func" ] || [ ${#func} -le 1 ]; then
continue
fi
local doc_link=$(get_llm_doc_link "$func" "$PRIMARY_LANG" "false")
echo "$count $func() → $doc_link"
done
}
# Generate imports with doc links
generate_imports_with_docs() {
local imports_file="$RESULTS_DIR/grep_imports.txt"
[ ! -f "$imports_file" ] && echo "No imports found" && return
head -20 "$imports_file" | grep -v '^#' | while read -r line; do
local count=$(echo "$line" | awk '{print $1}')
local import_stmt=$(echo "$line" | cut -d' ' -f2-)
[ -z "$import_stmt" ] && continue
# Check if internal import
if [[ "$import_stmt" =~ @/ ]] || [[ "$import_stmt" =~ \'\./ ]] || [[ "$import_stmt" =~ from\ app\. ]] || [[ "$import_stmt" =~ from\ src\. ]]; then
echo "$count $import_stmt → [INTERNAL - SKIP]"
else
local doc_link=$(get_llm_doc_link "$import_stmt" "$PRIMARY_LANG" "true")
echo "$count $import_stmt$doc_link"
fi
done
}
cat > "$LLM_PROMPT_FILE" << 'PROMPT_HEADER'
# LLM Prompt: Generate Anki Flashcards
You are creating Anki flashcards from code analysis.
## CRITICAL INSTRUCTIONS
1. **READ DOCS VIA TERMINAL** - Use the `cat` command to read each .md file:
```
cat /home/kuhy/.local/share/offline-docs/mdn-content/files/en-us/web/javascript/reference/statements/const/index.md
```
2. **DO NOT USE YOUR OWN KNOWLEDGE** - Base flashcards ONLY on the content you read from the files
3. **IF YOU CANNOT READ A FILE** - Report: "ERROR: Cannot read [path]" and skip that item
4. **NEVER FALL BACK TO GENERAL KNOWLEDGE** - If you can't read the file, skip it entirely
5. **READ ONE FILE AT A TIME** - Run cat for each topic before creating its flashcards
PROMPT_HEADER
cat >> "$LLM_PROMPT_FILE" << EOF
## Context
- Primary Language: **$PRIMARY_LANG**
## Top Keywords (by frequency)
Items marked \`[INTERNAL - SKIP]\` are project-specific - skip them.
Items marked \`[NO OFFLINE DOC]\` have no offline documentation - use online docs or skip.
Other items have offline doc paths you can reference.
\`\`\`
$(generate_keywords_with_docs)
\`\`\`
## Top Functions/Methods (by frequency)
\`\`\`
$(generate_functions_with_docs)
\`\`\`
## Top Imports/Includes
\`\`\`
$(generate_imports_with_docs)
\`\`\`
EOF
cat >> "$LLM_PROMPT_FILE" << 'PROMPT_FOOTER'
## Guidelines
**CRITICAL - Keep answers EXTREMELY short:**
- Most answers should be **1-2 words** or **1 sentence**
- It's common and expected for an answer to be just: "Returns an array" or "Immutable"
- 2 sentences = longer answer, 3 sentences = absolute maximum (rare)
- Each flashcard tests ONE atomic piece of knowledge
**NO DUPLICATES:**
- Before creating a card, check if you already created a similar question
- Each unique fact should appear in EXACTLY ONE card
- Do NOT create multiple cards asking the same thing with slightly different wording
**What to include:**
- Concept cards: "What is X?" / "What does X do?"
- Syntax cards: "How do you write X?" (brief code snippet)
- Comparison cards: "X vs Y - what's the difference?"
**What to SKIP (do NOT create cards for):**
- MDN frontmatter fields: title, slug, page-type, browser-compat, spec-urls
- YAML metadata between `---` markers at the start of files
- Any line that looks like metadata (key: value at start of doc)
- Empty answers - if you can't find content for the back, skip the card entirely
**FINAL CARD FOR EACH TOPIC (EXCEPTION TO SHORT ANSWER RULE):**
- Add EXACTLY ONE full documentation card per topic (no duplicates!)
- Question: `[Topic] - Full MDN Documentation`
- Answer: Copy the .md file content STARTING AFTER the `---` frontmatter block
- Skip the YAML frontmatter (everything between the first two `---` lines)
- Do NOT create this card twice for the same topic
**Skipped items - please review:**
- Items marked `[INTERNAL - SKIP]` are project-specific utilities - I skipped them
- Items marked `[NO OFFLINE DOC]` are third-party libraries without bundled docs
- If you want flashcards for skipped items, tell me which ones to include
## OUTPUT: CREATE AN ANKI FILE
**CREATE A FILE DIRECTLY** - Do not just output text. Use your file creation tool to create:
**File path:** `~/.local/share/study-materials/anki_generated.txt`
**Format:** Tab-separated values (TSV) with Anki metadata headers:
```
#separator:tab
#deck:CodeStudy::JavaScript
#notetype:CodeCard
#columns:Front Back Tags
What does <code>const</code> declare?Block-scoped variables with immutable bindings.javascript declarations
```
**Required headers at top of file:**
- `#separator:tab` - Specifies tab as delimiter
- `#deck:CodeStudy::[Language]` - Creates deck "CodeStudy" with sub-deck for language (e.g., CodeStudy::JavaScript)
- `#notetype:CodeCard` - Uses custom note type "CodeCard" (Anki will create if doesn't exist)
- `#columns:Front Back Tags` - Column headers (tab-separated)
**Rules:**
- Use ACTUAL `<code>` tags (not escaped &lt;code&gt;)
- Use `<br>` for line breaks within fields
- Use `<pre>` for code blocks
- Tags are space-separated
- Escape any literal tabs within content as spaces
**Example file content:**
```
#separator:tab
#deck:CodeStudy::JavaScript
#notetype:CodeCard
#columns:Front Back Tags
What does <code>const</code> declare?Block-scoped variables with immutable bindings.javascript declarations
Can <code>const</code> be reassigned?No, throws TypeError.javascript declarations
const - Full Documentation<pre>[ENTIRE CONTENT OF const/index.md FILE]</pre>javascript declarations full-doc
```
**After creating the file**, tell the user:
- File created at: ~/.local/share/study-materials/anki_generated.txt
- Import in Anki: File → Import → select the file
- Deck: CodeStudy::[Language], Note type: CodeCard
---
**Important:**
- Process only 5-10 items at a time to maintain quality
- Focus on items with offline documentation paths
- Output ONLY the TSV lines, no extra formatting or markdown
PROMPT_FOOTER
echo -e "${GREEN}Created: $LLM_PROMPT_FILE${NC}"
#==============================================================================
# Summary
#==============================================================================
echo ""
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} Study Materials Generated!${NC}"
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo ""
echo "Files created:"
echo " 📚 Documentation Links: $DOCS_FILE"
echo " 🎴 Anki Cards: $ANKI_FILE"
echo " 🤖 LLM Prompt: $LLM_PROMPT_FILE"
echo ""
echo "Next steps:"
echo " 1. Review documentation_links.md for learning resources"
echo " 2. Import anki_cards.txt into Anki (File -> Import)"
echo " 3. Use llm_anki_prompt.md with ChatGPT/Claude to generate more cards"
echo ""
echo "Anki import settings:"
echo " - Field separator: Tab"
echo " - Allow HTML: Yes"
echo " - Tags are in last field: Yes"

942
scripts/utils/lookup_docs.sh Executable file
View File

@ -0,0 +1,942 @@
#!/bin/bash
#==============================================================================
# Offline Documentation Lookup
# Searches downloaded documentation for terms
#
# Usage: ./lookup_docs.sh <term> [language] [--open] [--extract]
#
# Examples:
# ./lookup_docs.sh Path python # Find Path in Python docs
# ./lookup_docs.sh vector c_cpp # Find vector in C++ docs
# ./lookup_docs.sh map # Find map in all languages
# ./lookup_docs.sh --batch imports.txt # Lookup multiple terms from file
#==============================================================================
set -e
# Configuration
DOCS_DIR="${OFFLINE_DOCS_DIR:-$HOME/.local/share/offline-docs}"
INDEX_DIR="$DOCS_DIR/.index"
# Colors - only use if stdout is a terminal
if [ -t 1 ]; then
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
else
RED=''
GREEN=''
BLUE=''
YELLOW=''
CYAN=''
NC=''
fi
#==============================================================================
# Python-specific lookup
#==============================================================================
lookup_python() {
local term="$1"
local in_module="$2" # Optional: look for term within this module
local doc_dir="$DOCS_DIR/python"
local result=""
local desc=""
# Normalize term (preserve case for True/False/None)
local term_lower
term_lower=$(echo "$term" | tr '[:upper:]' '[:lower:]')
# If looking for a term within a specific module
if [ -n "$in_module" ]; then
local module_lower
module_lower=$(echo "$in_module" | tr '[:upper:]' '[:lower:]')
if [ -f "$doc_dir/library/${module_lower}.html" ]; then
# Find anchor for the specific item in the module
local anchor
anchor=$(grep -oP "id=\"[^\"]*${term}[^\"]*\"" "$doc_dir/library/${module_lower}.html" 2>/dev/null | head -1 | sed 's/id="//;s/"//')
if [ -n "$anchor" ]; then
result="$doc_dir/library/${module_lower}.html#$anchor"
desc="Python: $in_module.$term"
else
# Just link to the module
result="$doc_dir/library/${module_lower}.html"
desc="Python: $term in module $in_module"
fi
echo "$result|$desc"
return 0
fi
fi
#--------------------------------------------------------------------------
# PRIORITY 1: Python keywords - map to exact documentation locations
#--------------------------------------------------------------------------
# Compound statements (reference/compound_stmts.html)
case "$term_lower" in
if|elif|else)
result="$doc_dir/reference/compound_stmts.html#if"
desc="Python: if statement"
;;
for)
result="$doc_dir/reference/compound_stmts.html#for"
desc="Python: for statement"
;;
while)
result="$doc_dir/reference/compound_stmts.html#while"
desc="Python: while statement"
;;
def)
result="$doc_dir/reference/compound_stmts.html#def"
desc="Python: function definition"
;;
class)
result="$doc_dir/reference/compound_stmts.html#class"
desc="Python: class definition"
;;
try|except|finally)
result="$doc_dir/reference/compound_stmts.html#try"
desc="Python: try statement"
;;
with)
result="$doc_dir/reference/compound_stmts.html#with"
desc="Python: with statement"
;;
async)
result="$doc_dir/reference/compound_stmts.html#async"
desc="Python: async definition"
;;
match|case)
result="$doc_dir/reference/compound_stmts.html#match"
desc="Python: match statement"
;;
esac
# Simple statements (reference/simple_stmts.html)
if [ -z "$result" ]; then
case "$term_lower" in
return)
result="$doc_dir/reference/simple_stmts.html#return"
desc="Python: return statement"
;;
pass)
result="$doc_dir/reference/simple_stmts.html#pass"
desc="Python: pass statement"
;;
break)
result="$doc_dir/reference/simple_stmts.html#break"
desc="Python: break statement"
;;
continue)
result="$doc_dir/reference/simple_stmts.html#continue"
desc="Python: continue statement"
;;
import|from)
result="$doc_dir/reference/simple_stmts.html#import"
desc="Python: import statement"
;;
raise)
result="$doc_dir/reference/simple_stmts.html#raise"
desc="Python: raise statement"
;;
assert)
result="$doc_dir/reference/simple_stmts.html#assert"
desc="Python: assert statement"
;;
yield)
result="$doc_dir/reference/simple_stmts.html#yield"
desc="Python: yield expression"
;;
del)
result="$doc_dir/reference/simple_stmts.html#del"
desc="Python: del statement"
;;
global)
result="$doc_dir/reference/simple_stmts.html#global"
desc="Python: global statement"
;;
nonlocal)
result="$doc_dir/reference/simple_stmts.html#nonlocal"
desc="Python: nonlocal statement"
;;
type)
result="$doc_dir/reference/simple_stmts.html#type"
desc="Python: type alias statement"
;;
esac
fi
# Expressions/operators (reference/expressions.html)
if [ -z "$result" ]; then
case "$term_lower" in
and)
result="$doc_dir/reference/expressions.html#and"
desc="Python: and operator"
;;
or)
result="$doc_dir/reference/expressions.html#or"
desc="Python: or operator"
;;
not)
result="$doc_dir/reference/expressions.html#not"
desc="Python: not operator"
;;
in)
result="$doc_dir/reference/expressions.html#in"
desc="Python: in operator"
;;
is)
result="$doc_dir/reference/expressions.html#is"
desc="Python: is operator"
;;
lambda)
result="$doc_dir/reference/expressions.html#lambda"
desc="Python: lambda expression"
;;
await)
result="$doc_dir/reference/expressions.html#await"
desc="Python: await expression"
;;
esac
fi
# Built-in constants (library/constants.html) - case-sensitive!
if [ -z "$result" ]; then
case "$term" in
True|False)
result="$doc_dir/library/constants.html#$term"
desc="Python: $term constant"
;;
None)
result="$doc_dir/library/constants.html#None"
desc="Python: None constant"
;;
Ellipsis)
result="$doc_dir/library/constants.html#Ellipsis"
desc="Python: Ellipsis constant"
;;
NotImplemented)
result="$doc_dir/library/constants.html#NotImplemented"
desc="Python: NotImplemented constant"
;;
esac
fi
# Verify file exists for keyword lookups
if [ -n "$result" ] && [ ! -f "${result%%#*}" ]; then
result=""
desc=""
fi
#--------------------------------------------------------------------------
# PRIORITY 2: Check if it's a module (pathlib, os, sys, etc.)
#--------------------------------------------------------------------------
if [ -z "$result" ] && [ -f "$doc_dir/library/${term_lower}.html" ]; then
result="$doc_dir/library/${term_lower}.html"
desc="Python module: $term"
fi
#--------------------------------------------------------------------------
# PRIORITY 3: Built-in functions (library/functions.html)
#--------------------------------------------------------------------------
if [ -z "$result" ] && [ -f "$doc_dir/library/functions.html" ]; then
if grep -q "id=\"$term_lower\"" "$doc_dir/library/functions.html" 2>/dev/null; then
result="$doc_dir/library/functions.html#$term_lower"
desc="Python built-in function: $term"
fi
fi
#--------------------------------------------------------------------------
# PRIORITY 4: Built-in types (library/stdtypes.html)
#--------------------------------------------------------------------------
if [ -z "$result" ]; then
case "$term_lower" in
str|string)
result="$doc_dir/library/stdtypes.html#str"
desc="Python: str type"
;;
int|integer)
result="$doc_dir/library/stdtypes.html#int"
desc="Python: int type"
;;
float)
result="$doc_dir/library/stdtypes.html#float"
desc="Python: float type"
;;
list)
result="$doc_dir/library/stdtypes.html#list"
desc="Python: list type"
;;
dict|dictionary)
result="$doc_dir/library/stdtypes.html#dict"
desc="Python: dict type"
;;
set)
result="$doc_dir/library/stdtypes.html#set"
desc="Python: set type"
;;
tuple)
result="$doc_dir/library/stdtypes.html#tuple"
desc="Python: tuple type"
;;
bool|boolean)
result="$doc_dir/library/stdtypes.html#boolean-values"
desc="Python: bool type"
;;
bytes)
result="$doc_dir/library/stdtypes.html#bytes"
desc="Python: bytes type"
;;
esac
fi
#--------------------------------------------------------------------------
# PRIORITY 5: Check for class/function in module docs (exact id match)
#--------------------------------------------------------------------------
if [ -z "$result" ]; then
local found_in
# Look for exact id match first
found_in=$(grep -l "id=\"$term\"" "$doc_dir/library/"*.html 2>/dev/null | head -1)
if [ -n "$found_in" ]; then
result="$found_in#$term"
local module
module=$(basename "$found_in" .html)
desc="Python: $term in module $module"
fi
fi
#--------------------------------------------------------------------------
# PRIORITY 6: Search in index
#--------------------------------------------------------------------------
if [ -z "$result" ] && [ -f "$INDEX_DIR/python_index.txt" ]; then
local index_match
index_match=$(grep -i "^$term " "$INDEX_DIR/python_index.txt" 2>/dev/null | head -1)
if [ -n "$index_match" ]; then
result=$(echo "$index_match" | cut -d' ' -f2-)
desc="Python: $term (from index)"
fi
fi
# NO full-text search fallback - it produces garbage results
# If we can't find a specific doc, return nothing (will fall back to online)
if [ -n "$result" ]; then
echo "$result|$desc"
fi
}
#==============================================================================
# C/C++ specific lookup
#==============================================================================
lookup_cpp() {
local term="$1"
local doc_dir="$DOCS_DIR/c_cpp"
local result=""
local desc=""
# Resolve symlink if present (system package installs to c_cpp/system/)
[ -L "$doc_dir/system" ] && doc_dir="$doc_dir/system"
# Common C headers
case "$term" in
stdio.h|stdio)
[ -f "$doc_dir/reference/cstdio/index.html" ] && result="$doc_dir/reference/cstdio/index.html"
[ -f "$doc_dir/en/c/io.html" ] && result="$doc_dir/en/c/io.html"
desc="C standard I/O header"
;;
stdlib.h|stdlib)
[ -f "$doc_dir/reference/cstdlib/index.html" ] && result="$doc_dir/reference/cstdlib/index.html"
[ -f "$doc_dir/en/c/memory.html" ] && result="$doc_dir/en/c/memory.html"
desc="C standard library header"
;;
string.h|cstring)
[ -f "$doc_dir/reference/cstring/index.html" ] && result="$doc_dir/reference/cstring/index.html"
desc="C string handling header"
;;
math.h|cmath)
[ -f "$doc_dir/reference/cmath/index.html" ] && result="$doc_dir/reference/cmath/index.html"
desc="C math header"
;;
esac
# C++ STL containers
case "$term" in
vector)
[ -f "$doc_dir/reference/vector/index.html" ] && result="$doc_dir/reference/vector/index.html"
[ -f "$doc_dir/en/cpp/container/vector.html" ] && result="$doc_dir/en/cpp/container/vector.html"
desc="C++ std::vector container"
;;
map)
[ -f "$doc_dir/reference/map/index.html" ] && result="$doc_dir/reference/map/index.html"
desc="C++ std::map container"
;;
string)
[ -f "$doc_dir/reference/string/index.html" ] && result="$doc_dir/reference/string/index.html"
desc="C++ std::string"
;;
iostream)
[ -f "$doc_dir/reference/iostream/index.html" ] && result="$doc_dir/reference/iostream/index.html"
desc="C++ iostream header"
;;
esac
# C keywords
case "$term" in
if|else|for|while|do|switch|case|break|continue|return|goto)
[ -f "$doc_dir/en/c/language/$term.html" ] && result="$doc_dir/en/c/language/$term.html"
[ -f "$doc_dir/en/cpp/language/$term.html" ] && result="$doc_dir/en/cpp/language/$term.html"
desc="C/C++ keyword: $term"
;;
int|char|float|double|void|long|short|unsigned|signed)
[ -f "$doc_dir/en/c/language/type.html" ] && result="$doc_dir/en/c/language/type.html"
desc="C/C++ type: $term"
;;
struct|union|enum|typedef)
[ -f "$doc_dir/en/c/language/$term.html" ] && result="$doc_dir/en/c/language/$term.html"
desc="C/C++ keyword: $term"
;;
esac
# Search in files if not found (use -L to follow symlinks)
if [ -z "$result" ]; then
local found
found=$(find -L "$doc_dir" -name "*${term}*" -type f 2>/dev/null | head -1)
if [ -n "$found" ]; then
result="$found"
desc="C/C++: $term"
fi
fi
if [ -n "$result" ]; then
echo "$result|$desc"
fi
}
#==============================================================================
# JavaScript/MDN specific lookup
# Searches the cloned MDN content repository
#==============================================================================
lookup_js() {
local term="$1"
local mdn_dir="$DOCS_DIR/mdn-content/files/en-us"
# Normalize term for searching
local term_lower
term_lower=$(echo "$term" | tr '[:upper:]' '[:lower:]')
# Handle common statement aliases (MDN uses if...else, try...catch, etc.)
local statement_aliases=(
"if:if...else"
"else:if...else"
"try:try...catch"
"catch:try...catch"
"finally:try...catch"
"do:do...while"
"while:while"
"for:for"
"switch:switch"
"case:switch"
"default:switch"
)
for alias in "${statement_aliases[@]}"; do
local key="${alias%%:*}"
local value="${alias##*:}"
if [ "$term_lower" = "$key" ]; then
local stmt_dir="$mdn_dir/web/javascript/reference/statements/$value"
if [ -d "$stmt_dir" ] && [ -f "$stmt_dir/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$stmt_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$stmt_dir/index.md|${title:-$term}"
return 0
fi
fi
done
# Handle boolean/null literals
case "$term_lower" in
true|false)
local bool_dir="$mdn_dir/web/javascript/reference/global_objects/boolean"
if [ -d "$bool_dir" ] && [ -f "$bool_dir/index.md" ]; then
echo "$bool_dir/index.md|Boolean ($term)"
return 0
fi
;;
null)
local null_dir="$mdn_dir/web/javascript/reference/operators/null"
if [ -d "$null_dir" ] && [ -f "$null_dir/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$null_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$null_dir/index.md|${title:-null}"
return 0
fi
;;
undefined)
local undef_dir="$mdn_dir/web/javascript/reference/global_objects/undefined"
if [ -d "$undef_dir" ] && [ -f "$undef_dir/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$undef_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$undef_dir/index.md|${title:-undefined}"
return 0
fi
;;
esac
# Search JavaScript reference directory structure (priority order)
local search_dirs=(
"$mdn_dir/web/javascript/reference/statements"
"$mdn_dir/web/javascript/reference/operators"
"$mdn_dir/web/javascript/reference/global_objects"
"$mdn_dir/web/javascript/reference/functions"
"$mdn_dir/web/javascript/reference/classes"
)
for search_dir in "${search_dirs[@]}"; do
if [ -d "$search_dir" ]; then
# Look for exact directory match (MDN uses directories with index.md)
local found_dir
found_dir=$(find "$search_dir" -maxdepth 2 -type d -iname "$term" 2>/dev/null | head -1)
if [ -n "$found_dir" ] && [ -f "$found_dir/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$found_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$found_dir/index.md|${title:-$term}"
return 0
fi
fi
done
# Search Web APIs - prioritize *_api directories for common terms
if [ -d "$mdn_dir/web/api" ]; then
# First try <term>_api directory (e.g., fetch_api, console_api)
local api_dir="$mdn_dir/web/api/${term_lower}_api"
if [ -d "$api_dir" ] && [ -f "$api_dir/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$api_dir/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$api_dir/index.md|${title:-$term API}"
return 0
fi
# Then try exact top-level API interface (e.g., Console, Document, Element)
local found
found=$(find "$mdn_dir/web/api" -maxdepth 1 -type d -iname "$term" 2>/dev/null | head -1)
if [ -n "$found" ] && [ -f "$found/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$found/index.md|${title:-$term}"
return 0
fi
# Try window/<term> for global functions like alert, confirm, etc.
local window_method="$mdn_dir/web/api/window/${term_lower}"
if [ -d "$window_method" ] && [ -f "$window_method/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$window_method/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$window_method/index.md|${title:-Window.$term()}"
return 0
fi
# Search nested API methods
found=$(find "$mdn_dir/web/api" -maxdepth 3 -type d -iname "$term" 2>/dev/null | head -1)
if [ -n "$found" ] && [ -f "$found/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$found/index.md|${title:-$term}"
return 0
fi
fi
# Now try partial matches in Global Objects (e.g., Array.from, Object.keys)
if [ -d "$mdn_dir/web/javascript/reference/global_objects" ]; then
local found
found=$(find "$mdn_dir/web/javascript/reference/global_objects" -maxdepth 2 -type d -iname "*${term}*" 2>/dev/null | head -1)
if [ -n "$found" ] && [ -f "$found/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$found/index.md|${title:-$term}"
return 0
fi
fi
# Glossary as last resort
if [ -d "$mdn_dir/glossary" ]; then
local found
found=$(find "$mdn_dir/glossary" -maxdepth 1 -type d -iname "$term" 2>/dev/null | head -1)
if [ -n "$found" ] && [ -f "$found/index.md" ]; then
local title
title=$(grep -m1 "^title:" "$found/index.md" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "$found/index.md|${title:-$term}"
return 0
fi
fi
return 1
}
#==============================================================================
# Rust specific lookup
#==============================================================================
lookup_rust() {
local term="$1"
local result=""
local desc=""
if command -v rustup &>/dev/null; then
# Use rustup doc to get path
local rust_doc_path
rust_doc_path=$(rustup doc --path 2>/dev/null | head -1 | xargs dirname 2>/dev/null)
# Search in std docs
if [ -d "$rust_doc_path/std" ]; then
local found
found=$(find "$rust_doc_path/std" -name "*${term}*" -type f 2>/dev/null | head -1)
if [ -n "$found" ]; then
result="$found"
desc="Rust: $term"
fi
fi
fi
if [ -n "$result" ]; then
echo "$result|$desc"
fi
}
#==============================================================================
# Go specific lookup
#==============================================================================
lookup_go() {
local term="$1"
local result=""
local desc=""
if command -v go &>/dev/null; then
# Check if it's a stdlib package
if go doc "$term" &>/dev/null; then
result="go doc $term"
desc="Go package: $term (use 'go doc $term' to view)"
fi
fi
if [ -n "$result" ]; then
echo "$result|$desc"
fi
}
#==============================================================================
# Shell specific lookup
#==============================================================================
lookup_shell() {
local term="$1"
local doc_dir="$DOCS_DIR/shell"
local result=""
local desc=""
# Check bash builtins
if [ -f "$doc_dir/bash_builtins.txt" ]; then
if grep -q "=== $term ===" "$doc_dir/bash_builtins.txt" 2>/dev/null; then
result="$doc_dir/bash_builtins.txt"
desc="Bash builtin: $term"
fi
fi
# Check common commands
if [ -z "$result" ] && [ -f "$doc_dir/common_commands.txt" ]; then
if grep -q "^$term" "$doc_dir/common_commands.txt" 2>/dev/null; then
local cmd_desc
cmd_desc=$(grep "^$term" "$doc_dir/common_commands.txt" | head -1)
result="$doc_dir/common_commands.txt"
desc="Shell command: $cmd_desc"
fi
fi
# Try man page
if [ -z "$result" ]; then
local man_path
man_path=$(man -w "$term" 2>/dev/null)
if [ -n "$man_path" ]; then
result="man $term"
desc="Manual page: $term (use 'man $term' to view)"
fi
fi
if [ -n "$result" ]; then
echo "$result|$desc"
fi
}
#==============================================================================
# Generic lookup (searches all languages)
#==============================================================================
lookup_all() {
local term="$1"
# Try each language
for lang in python cpp js rust go shell; do
local result
result=$(lookup_$lang "$term" 2>/dev/null)
if [ -n "$result" ]; then
echo "$lang: $result"
fi
done
}
#==============================================================================
# Parse Python import and lookup the actual imported item
#==============================================================================
parse_python_import() {
local import_line="$1"
# Handle "from X import Y" format
if [[ "$import_line" =~ ^from[[:space:]]+([^[:space:]]+)[[:space:]]+import[[:space:]]+(.+) ]]; then
local module="${BASH_REMATCH[1]}"
local items="${BASH_REMATCH[2]}"
# Clean up items (remove parentheses, commas, etc.)
items=$(echo "$items" | sed 's/[(),]//g' | awk '{print $1}')
# Output: module and first imported item
echo "$module|$items"
return 0
fi
# Handle "import X" format
if [[ "$import_line" =~ ^import[[:space:]]+([^[:space:],]+) ]]; then
local module="${BASH_REMATCH[1]}"
echo "$module|"
return 0
fi
return 1
}
#==============================================================================
# Smart lookup for imports
#==============================================================================
lookup_import() {
local import_line="$1"
local lang="$2"
case "$lang" in
python)
local parsed
parsed=$(parse_python_import "$import_line")
if [ -n "$parsed" ]; then
local module item
module=$(echo "$parsed" | cut -d'|' -f1)
item=$(echo "$parsed" | cut -d'|' -f2)
# For "from X import Y", look up Y within module X's documentation
if [ -n "$item" ] && [ -n "$module" ]; then
local result
# Pass both item and module to lookup_python
result=$(lookup_python "$item" "$module")
if [ -n "$result" ]; then
echo "$result"
return 0
fi
fi
# Fall back to module documentation
lookup_python "$module"
fi
;;
c_cpp)
# Extract header name from #include <header> or #include "header"
local header
header=$(echo "$import_line" | sed -E 's/#include\s*[<"]([^">]+)[">]/\1/' | sed 's/\.h$//')
lookup_cpp "$header"
;;
javascript|typescript)
# Extract module from import/require
local module=""
# Match: from "module" or from 'module'
module=$(echo "$import_line" | grep -oP "from\s+['\"]\\K[^'\"]+")
if [ -z "$module" ]; then
# Match: require("module") or require('module')
module=$(echo "$import_line" | grep -oP "require\\(['\"]\\K[^'\"]+")
fi
[ -n "$module" ] && lookup_js "$module"
;;
*)
echo "Unknown language: $lang"
;;
esac
}
#==============================================================================
# Extract documentation content
#==============================================================================
extract_doc_content() {
local file="$1"
local term="$2"
local max_lines="${3:-20}"
if [[ "$file" == *.html ]]; then
# Extract text from HTML, find section about term
if command -v html2text &>/dev/null; then
html2text "$file" 2>/dev/null | grep -A"$max_lines" -i "$term" | head -"$max_lines"
elif command -v lynx &>/dev/null; then
lynx -dump -nolist "$file" 2>/dev/null | grep -A"$max_lines" -i "$term" | head -"$max_lines"
else
# Basic extraction
sed 's/<[^>]*>//g' "$file" | grep -A"$max_lines" -i "$term" | head -"$max_lines"
fi
elif [[ "$file" == *.json ]]; then
# Pretty print JSON section
grep -A5 "\"$term\"" "$file" 2>/dev/null
else
# Plain text
grep -A"$max_lines" -i "$term" "$file" | head -"$max_lines"
fi
}
#==============================================================================
# Main
#==============================================================================
usage() {
cat << EOF
Usage: $0 <term> [language] [options]
Search offline documentation for a term.
Languages: python, cpp, c_cpp, js, javascript, rust, go, shell, all
Options:
--open Open the documentation file (requires xdg-open)
--extract Extract and display relevant content
--import Parse and lookup an import statement
--batch Process multiple terms from a file
Examples:
$0 Path python # Find Path in Python docs
$0 vector cpp # Find vector in C++ docs
$0 map # Find map in all languages
$0 --import "from pathlib import Path" python
$0 --batch imports.txt python
EOF
}
main() {
if [ $# -eq 0 ]; then
usage
exit 0
fi
local term=""
local lang=""
local action="lookup"
local open_file=false
local extract=false
while [ $# -gt 0 ]; do
case "$1" in
--open)
open_file=true
shift
;;
--extract)
extract=true
shift
;;
--import)
action="import"
shift
term="$1"
shift
;;
--batch)
action="batch"
shift
term="$1" # This is the file
shift
;;
--help|-h)
usage
exit 0
;;
python|cpp|c_cpp|c|js|javascript|ts|typescript|tsx|jsx|rust|go|shell|bash|all)
lang="$1"
shift
;;
*)
if [ -z "$term" ]; then
term="$1"
fi
shift
;;
esac
done
# Normalize language
case "$lang" in
c) lang="cpp" ;;
javascript|js|typescript|ts|jsx|tsx) lang="js" ;;
bash) lang="shell" ;;
"") lang="all" ;;
esac
case "$action" in
lookup)
if [ "$lang" = "all" ]; then
lookup_all "$term"
else
result=$(lookup_$lang "$term" 2>/dev/null)
if [ -n "$result" ]; then
local file desc
file=$(echo "$result" | cut -d'|' -f1)
desc=$(echo "$result" | cut -d'|' -f2)
echo -e "${GREEN}Found:${NC} $desc"
echo -e "${BLUE}File:${NC} $file"
if $extract; then
echo ""
echo -e "${YELLOW}--- Content ---${NC}"
extract_doc_content "$file" "$term"
fi
if $open_file && [ -f "$file" ]; then
xdg-open "$file" 2>/dev/null &
fi
else
echo -e "${RED}Not found:${NC} $term in $lang documentation"
fi
fi
;;
import)
result=$(lookup_import "$term" "$lang")
if [ -n "$result" ]; then
echo -e "${GREEN}Import lookup:${NC} $term"
echo "$result"
else
echo -e "${RED}Could not parse import:${NC} $term"
fi
;;
batch)
if [ ! -f "$term" ]; then
echo "File not found: $term"
exit 1
fi
while IFS= read -r line || [ -n "$line" ]; do
[ -z "$line" ] && continue
[[ "$line" =~ ^# ]] && continue
echo -e "${CYAN}Looking up:${NC} $line"
lookup_import "$line" "$lang"
echo ""
done < "$term"
;;
esac
}
main "$@"

364
scripts/utils/repo_to_study.sh Executable file
View File

@ -0,0 +1,364 @@
#!/usr/bin/env bash
#==============================================================================
# repo_to_study.sh - Complete pipeline: Repo → Analysis → Offline Docs → Study Materials
#
# Usage:
# repo_to_study.sh <repo_url_or_path>
#
# Examples:
# repo_to_study.sh https://github.com/user/repo
# repo_to_study.sh /path/to/local/repo
# repo_to_study.sh .
#
# Output:
# Creates study materials in ~/.local/share/study-materials/<repo_name>/
# - documentation_links.md (with offline doc paths)
# - anki_cards.txt (importable to Anki)
# - llm_anki_prompt.md (for generating more cards with AI)
#==============================================================================
set -euo pipefail
# Script directory for finding other tools
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ANALYZE_SCRIPT="$SCRIPT_DIR/analyze_repo.sh"
STUDY_SCRIPT="$SCRIPT_DIR/generate_study_materials.sh"
SETUP_DOCS_SCRIPT="$SCRIPT_DIR/setup_offline_docs.sh"
# Default output location (not in script dir, user's data dir)
STUDY_MATERIALS_BASE="$HOME/.local/share/study-materials"
# Work directories
WORK_DIR="/tmp/repo_study_$$"
OUTPUT_DIR=""
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
#==============================================================================
# Helper Functions (all print to stderr to not interfere with return values)
#==============================================================================
print_header() {
echo -e "\n${BOLD}${CYAN}════════════════════════════════════════════════════════════${NC}" >&2
echo -e "${BOLD}${CYAN} $1${NC}" >&2
echo -e "${BOLD}${CYAN}════════════════════════════════════════════════════════════${NC}\n" >&2
}
print_step() {
echo -e "${BOLD}${BLUE}$1${NC}" >&2
}
print_success() {
echo -e "${GREEN}$1${NC}" >&2
}
print_error() {
echo -e "${RED}$1${NC}" >&2
}
print_info() {
echo -e "${YELLOW}$1${NC}" >&2
}
cleanup() {
if [ -d "$WORK_DIR" ] && [ "$WORK_DIR" != "/" ]; then
rm -rf "$WORK_DIR"
fi
}
trap cleanup EXIT
usage() {
cat << EOF
repo_to_study.sh - Generate study materials from any repository
USAGE:
$(basename "$0") <repo_url_or_path> [output_dir]
ARGUMENTS:
repo_url_or_path Git URL (https/ssh) or local path to repository
output_dir Optional: where to save results
Default: ~/.local/share/study-materials/<repo_name>/
EXAMPLES:
$(basename "$0") https://github.com/python/cpython
$(basename "$0") git@github.com:torvalds/linux.git
$(basename "$0") /home/user/my-project
$(basename "$0") . ~/notes/my_study_notes
OUTPUT FILES:
documentation_links.md - Markdown with offline documentation links
anki_cards.txt - Tab-separated file for Anki import
llm_anki_prompt.md - Prompt template for AI-generated cards
analysis/ - Raw analysis data (imports, keywords, functions)
EOF
exit 0
}
#==============================================================================
# Check Dependencies
#==============================================================================
check_dependencies() {
local missing=()
# Check for required scripts
if [ ! -x "$ANALYZE_SCRIPT" ]; then
missing+=("analyze_repo.sh not found at $ANALYZE_SCRIPT")
fi
if [ ! -x "$STUDY_SCRIPT" ]; then
missing+=("generate_study_materials.sh not found at $STUDY_SCRIPT")
fi
# Check for basic tools
for cmd in git curl grep sed awk; do
if ! command -v "$cmd" &>/dev/null; then
missing+=("$cmd")
fi
done
if [ ${#missing[@]} -gt 0 ]; then
print_error "Missing dependencies:"
for dep in "${missing[@]}"; do
echo " - $dep"
done
exit 1
fi
}
#==============================================================================
# Ensure Offline Docs are Available
#==============================================================================
ensure_offline_docs() {
local docs_dir="$HOME/.local/share/offline-docs"
if [ ! -d "$docs_dir/python" ]; then
print_info "Offline docs not found. Setting up Python documentation..."
if [ -x "$SETUP_DOCS_SCRIPT" ]; then
"$SETUP_DOCS_SCRIPT" --python
else
print_info "Run setup_offline_docs.sh --all to enable offline documentation"
fi
fi
}
# Global to store repo name for cloned repos
REPO_NAME=""
#==============================================================================
# Get Repository
#==============================================================================
get_repo() {
local input="$1"
local repo_dir=""
# Check if it's a URL (git clone needed)
if [[ "$input" =~ ^https?:// ]] || [[ "$input" =~ ^git@ ]]; then
print_step "Cloning repository..."
# Extract repo name from URL
REPO_NAME=$(basename "$input" .git)
repo_dir="$WORK_DIR/$REPO_NAME"
mkdir -p "$WORK_DIR"
if git clone --depth 1 "$input" "$repo_dir" >&2 2>&1; then
print_success "Cloned: $input"
else
print_error "Failed to clone repository"
exit 1
fi
echo "$repo_dir"
# Local path
elif [ -d "$input" ]; then
# Convert to absolute path
repo_dir="$(cd "$input" && pwd)"
REPO_NAME=$(basename "$repo_dir")
print_success "Using local repository: $repo_dir"
echo "$repo_dir"
else
print_error "Invalid input: '$input' is not a valid URL or directory"
exit 1
fi
}
#==============================================================================
# Analyze Repository
#==============================================================================
analyze_repo() {
local repo_path="$1"
local repo_name="$REPO_NAME"
[ -z "$repo_name" ] && repo_name=$(basename "$repo_path")
print_step "Analyzing repository..."
# Run the analyzer (it outputs to stderr/stdout, results go to /tmp/repo_analysis/)
"$ANALYZE_SCRIPT" "$repo_path" >&2 || true
# Find the results directory
local results_dir="/tmp/repo_analysis/results_${repo_name}"
if [ ! -d "$results_dir" ]; then
# Try without prefix
results_dir="/tmp/repo_analysis/results"
fi
if [ ! -d "$results_dir" ] || [ ! -d "$results_dir/per_language" ]; then
print_error "Could not find analysis results at $results_dir"
exit 1
fi
print_success "Analysis complete: $results_dir"
echo "$results_dir"
}
#==============================================================================
# Generate Study Materials
#==============================================================================
generate_materials() {
local analysis_dir="$1"
local output_dir="$2"
print_step "Generating study materials with offline documentation..."
# Run study materials generator
cd "$analysis_dir"
if "$STUDY_SCRIPT" . 2>/dev/null | grep -E "^(Created|✓|Files created)" | head -5; then
print_success "Study materials generated"
else
# Try anyway, might have succeeded
true
fi
# Create output directory and copy results
mkdir -p "$output_dir"
# Copy generated files
[ -f "documentation_links.md" ] && cp "documentation_links.md" "$output_dir/"
[ -f "anki_cards.txt" ] && cp "anki_cards.txt" "$output_dir/"
[ -f "llm_anki_prompt.md" ] && cp "llm_anki_prompt.md" "$output_dir/"
# Copy analysis data
mkdir -p "$output_dir/analysis"
[ -d "per_language" ] && cp -r "per_language" "$output_dir/analysis/"
[ -f "grep_imports.txt" ] && cp "grep_imports.txt" "$output_dir/analysis/"
[ -f "grep_keywords.txt" ] && cp "grep_keywords.txt" "$output_dir/analysis/"
[ -f "grep_function_calls.txt" ] && cp "grep_function_calls.txt" "$output_dir/analysis/"
print_success "Files saved to: $output_dir"
}
#==============================================================================
# Show Summary
#==============================================================================
show_summary() {
local output_dir="$1"
print_header "Study Materials Ready!"
echo -e "${BOLD}Output directory:${NC} $output_dir"
echo ""
echo -e "${BOLD}Generated files:${NC}"
if [ -f "$output_dir/documentation_links.md" ]; then
local doc_lines
doc_lines=$(wc -l < "$output_dir/documentation_links.md")
echo -e " 📚 ${GREEN}documentation_links.md${NC} ($doc_lines lines)"
echo " Contains links to OFFLINE documentation"
fi
if [ -f "$output_dir/anki_cards.txt" ]; then
local card_count
card_count=$(grep -c $'^\w' "$output_dir/anki_cards.txt" 2>/dev/null || echo "0")
echo -e " 🎴 ${GREEN}anki_cards.txt${NC} (~$card_count cards)"
echo " Import to Anki: File → Import → Tab separated"
fi
if [ -f "$output_dir/llm_anki_prompt.md" ]; then
echo -e " 🤖 ${GREEN}llm_anki_prompt.md${NC}"
echo " Use with ChatGPT/Claude to generate more cards"
fi
if [ -d "$output_dir/analysis" ]; then
echo -e " 📊 ${GREEN}analysis/${NC}"
echo " Raw analysis data (imports, keywords, functions per language)"
fi
echo ""
echo -e "${BOLD}Quick preview of imports with offline docs:${NC}"
if [ -f "$output_dir/documentation_links.md" ]; then
grep -A20 "import/from" "$output_dir/documentation_links.md" 2>/dev/null | \
grep "^\| \`" | head -5 | \
sed 's/|/│/g'
fi
echo ""
echo -e "${BOLD}Next steps:${NC}"
echo " 1. Open documentation_links.md to browse offline docs"
echo " 2. Import anki_cards.txt into Anki for spaced repetition"
echo " 3. Use llm_anki_prompt.md to generate more targeted cards"
echo ""
echo -e "${CYAN}To view a doc:${NC} xdg-open 'file:///path/from/documentation_links.md'"
}
#==============================================================================
# Main
#==============================================================================
main() {
# Handle help
if [ $# -lt 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
usage
fi
local input="$1"
local output_dir="${2:-}" # Will be set after we know repo name
print_header "Repo → Study Materials Pipeline"
# Setup
mkdir -p "$WORK_DIR"
check_dependencies
ensure_offline_docs
# Step 1: Get repository
print_header "Step 1/3: Getting Repository"
local repo_path
repo_path=$(get_repo "$input")
# Extract repo name from path (since get_repo runs in subshell, REPO_NAME is lost)
if [ -z "$REPO_NAME" ]; then
REPO_NAME=$(basename "$repo_path")
fi
# Set default output dir based on repo name
if [ -z "$output_dir" ]; then
output_dir="$STUDY_MATERIALS_BASE/$REPO_NAME"
elif [[ "$output_dir" != /* ]]; then
# Convert relative to absolute
output_dir="$(pwd)/$output_dir"
fi
echo -e "${BOLD}Input:${NC} $input" >&2
echo -e "${BOLD}Output:${NC} $output_dir" >&2
echo "" >&2
# Step 2: Analyze
print_header "Step 2/3: Analyzing Code"
local analysis_dir
analysis_dir=$(analyze_repo "$repo_path")
# Step 3: Generate materials
print_header "Step 3/3: Generating Study Materials"
generate_materials "$analysis_dir" "$output_dir"
# Show results
show_summary "$output_dir"
}
main "$@"

View File

@ -0,0 +1,713 @@
#!/bin/bash
#==============================================================================
# Offline Documentation Setup
# Downloads and indexes official documentation for multiple programming languages
#
# Usage: ./setup_offline_docs.sh [--all | --python | --c | --js | --rust | --go]
#
# Documentation is stored in: ~/.local/share/offline-docs/
#==============================================================================
set -e
# Configuration
DOCS_DIR="${OFFLINE_DOCS_DIR:-$HOME/.local/share/offline-docs}"
INDEX_DIR="$DOCS_DIR/.index"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m'
print_header() {
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo -e "${GREEN} $1${NC}"
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
}
print_status() {
echo -e "${YELLOW}${NC} $1"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
print_error() {
echo -e "${RED}${NC} $1"
}
# Create directory structure
setup_dirs() {
mkdir -p "$DOCS_DIR"/{python,c_cpp,javascript,typescript,rust,go,ruby,java,shell}
mkdir -p "$INDEX_DIR"
}
#==============================================================================
# Python Documentation
# Source: https://docs.python.org/3/download.html
#==============================================================================
download_python_docs() {
print_header "Python Documentation"
local dest="$DOCS_DIR/python"
# Check if already downloaded
if [ -f "$dest/library/index.html" ]; then
print_status "Python docs already present, checking for updates..."
fi
print_status "Downloading Python 3.12 documentation..."
# Download HTML documentation (most searchable)
local url="https://www.python.org/ftp/python/doc/3.12.8/python-3.12.8-docs-html.tar.bz2"
local archive="/tmp/python-docs.tar.bz2"
if curl -L -o "$archive" "$url" 2>/dev/null; then
print_status "Extracting..."
tar -xjf "$archive" -C "$dest" --strip-components=1
rm -f "$archive"
print_success "Python documentation installed to $dest"
# Build index
build_python_index
else
print_error "Failed to download Python docs"
print_status "Alternative: Use 'python -m pydoc' for built-in docs"
fi
}
build_python_index() {
print_status "Building Python documentation index..."
local dest="$DOCS_DIR/python"
local index="$INDEX_DIR/python_index.txt"
# Create searchable index: term -> file path
{
# Index library modules
find "$dest/library" -name "*.html" -exec basename {} .html \; 2>/dev/null | while read -r mod; do
echo "$mod $dest/library/$mod.html"
done
# Index built-in functions from functions.html
if [ -f "$dest/library/functions.html" ]; then
grep -oP '(?<=id=")[^"]+' "$dest/library/functions.html" 2>/dev/null | while read -r func; do
echo "$func $dest/library/functions.html#$func"
done
fi
# Index from general index
if [ -f "$dest/genindex.html" ]; then
grep -oP 'href="([^"]+)"[^>]*>([^<]+)' "$dest/genindex.html" 2>/dev/null | \
sed -E 's/href="([^"]+)"[^>]*>([^<]+)/\2 \1/' | \
head -5000
fi
} | sort -u > "$index"
print_success "Python index created with $(wc -l < "$index") entries"
}
#==============================================================================
# C/C++ Documentation (cppreference)
# Uses cppman tool which caches pages from cppreference.com
# Fallback: AUR cppreference package or direct download
#==============================================================================
download_cpp_docs() {
print_header "C/C++ Documentation (cppreference)"
local dest="$DOCS_DIR/c_cpp"
if [ -f "$dest/en/index.html" ] || [ -d "$dest/reference" ] || [ -L "$dest/system" ]; then
print_status "C/C++ docs already present"
return 0
fi
mkdir -p "$dest"
# Method 1: Use cppman if available (best - fetches and caches on demand)
if command -v cppman &>/dev/null; then
print_status "Found cppman, caching common C++ references..."
cppman -s cppreference.com 2>/dev/null
cppman -c 2>/dev/null # Cache all pages
print_success "cppman configured - use 'cppman <term>' for lookups"
print_status "Cppman cache at: ~/.cache/cppman/"
ln -sf ~/.cache/cppman "$dest/cppman_cache" 2>/dev/null
build_cpp_index
return 0
fi
# Method 2: Check if system package already installed
if [ -d /usr/share/doc/cppreference/en ]; then
print_status "Found system cppreference package"
ln -sf /usr/share/doc/cppreference "$dest/system"
print_success "C/C++ documentation linked from system package"
build_cpp_index
return 0
fi
# Method 3: Try AUR package (Arch Linux)
if command -v yay &>/dev/null; then
print_status "Installing cppreference from AUR..."
if yay -S --noconfirm cppreference 2>/dev/null; then
# Link to installed docs (the package uses /en not /html)
if [ -d /usr/share/doc/cppreference/en ]; then
ln -sf /usr/share/doc/cppreference "$dest/system"
print_success "C/C++ documentation linked from system package"
build_cpp_index
return 0
fi
fi
fi
# Method 4: Direct download (try multiple mirrors)
print_status "Downloading cppreference offline archive..."
local archive="/tmp/cppreference.tar.xz"
local urls=(
"https://upload.cppreference.com/mwiki/images/1/16/html_book_20241110.tar.xz"
"https://github.com/nicovank/cppreference-doc/releases/latest/download/html_book.tar.xz"
)
for url in "${urls[@]}"; do
print_status "Trying: $url"
if curl -fL -o "$archive" "$url" 2>/dev/null; then
print_status "Extracting (this may take a while)..."
if tar -xJf "$archive" -C "$dest" 2>/dev/null; then
rm -f "$archive"
print_success "C/C++ documentation installed to $dest"
build_cpp_index
return 0
fi
fi
done
print_error "Failed to download cppreference"
print_status "Manual install: yay -S cppreference OR yay -S cppman"
return 1
}
build_cpp_index() {
print_status "Building C/C++ documentation index..."
local dest="$DOCS_DIR/c_cpp"
local index="$INDEX_DIR/cpp_index.txt"
# Resolve symlink if present
local search_dir="$dest"
[ -L "$dest/system" ] && search_dir="$dest/system"
{
# Find all HTML files and extract identifiers
# Format: term|filepath (using | as separator to handle spaces)
find "$search_dir" -name "*.html" -type f 2>/dev/null | while read -r file; do
# Extract meaningful term from path (e.g., /en/cpp/container/vector.html -> vector)
local term
term=$(basename "$file" .html)
# Skip index files and overly generic names
[[ "$term" == "index" ]] && continue
echo "${term}|${file}"
done
# Also index by path components for better discoverability
# e.g., cpp/container/vector -> vector
find "$search_dir/en" -name "*.html" -type f 2>/dev/null | while read -r file; do
# Extract path relative to en/ and create searchable term
local relpath
relpath=$(echo "$file" | sed "s|$search_dir/en/||" | sed 's|\.html$||')
# Get the last component as primary term
local term
term=$(basename "$relpath")
[[ "$term" == "index" ]] && continue
# Also add the full path as a searchable term (cpp/vector, c/stdlib/malloc)
echo "${relpath}|${file}"
done
} | sort -u > "$index"
print_success "C/C++ index created with $(wc -l < "$index") entries"
}
#==============================================================================
# JavaScript/MDN Documentation
# Clone the actual MDN content repository for full documentation
# https://github.com/mdn/content
#==============================================================================
download_js_docs() {
print_header "JavaScript/MDN Documentation"
local dest="$DOCS_DIR/javascript"
local mdn_repo="$DOCS_DIR/mdn-content"
# Check if already cloned
if [ -d "$mdn_repo/files/en-us/web/javascript" ]; then
print_status "MDN content already present"
build_js_index
return 0
fi
print_status "Cloning MDN content repository (sparse checkout for web docs)..."
print_status "This may take a few minutes on first run..."
mkdir -p "$mdn_repo"
cd "$mdn_repo" || exit 1
# Initialize sparse checkout to only get what we need
if [ ! -d ".git" ]; then
git init
git remote add origin https://github.com/mdn/content.git
git config core.sparseCheckout true
# Only checkout web-related documentation (JS, HTML, CSS, Web APIs)
cat > .git/info/sparse-checkout << 'SPARSE'
/files/en-us/web/javascript/
/files/en-us/web/api/
/files/en-us/web/html/
/files/en-us/web/css/
/files/en-us/glossary/
SPARSE
print_status "Fetching MDN content (JavaScript, HTML, CSS, Web APIs)..."
git fetch --depth 1 origin main
git checkout main
else
print_status "Updating MDN content..."
git pull --depth 1 origin main 2>/dev/null || true
fi
cd - > /dev/null || exit 1
# Create symlink for easier access
mkdir -p "$dest"
ln -sf "$mdn_repo/files/en-us/web/javascript" "$dest/javascript"
ln -sf "$mdn_repo/files/en-us/web/api" "$dest/web-api"
ln -sf "$mdn_repo/files/en-us/web/html" "$dest/html"
ln -sf "$mdn_repo/files/en-us/web/css" "$dest/css"
ln -sf "$mdn_repo/files/en-us/glossary" "$dest/glossary"
build_js_index
print_success "MDN offline documentation ready"
local doc_count
doc_count=$(find "$mdn_repo/files" -name "index.md" 2>/dev/null | wc -l)
print_status "Downloaded $doc_count documentation pages"
}
build_js_index() {
print_status "Building MDN documentation index..."
local mdn_repo="$DOCS_DIR/mdn-content"
local index="$INDEX_DIR/js_index.txt"
if [ ! -d "$mdn_repo/files" ]; then
print_error "MDN content not found"
return 1
fi
# Build comprehensive index from MDN markdown files
{
# Index JavaScript reference
find "$mdn_repo/files/en-us/web/javascript/reference" -name "index.md" 2>/dev/null | while read -r file; do
local dir
dir=$(dirname "$file")
local term
term=$(basename "$dir")
# Extract title from frontmatter if available
local title
title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "${term}|${file}|${title:-$term}"
done
# Index Web APIs
find "$mdn_repo/files/en-us/web/api" -name "index.md" 2>/dev/null | while read -r file; do
local dir
dir=$(dirname "$file")
local term
term=$(basename "$dir")
local title
title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "${term}|${file}|${title:-$term}"
done
# Index HTML elements
find "$mdn_repo/files/en-us/web/html/element" -name "index.md" 2>/dev/null | while read -r file; do
local dir
dir=$(dirname "$file")
local term
term=$(basename "$dir")
echo "${term}|${file}|HTML <${term}> element"
done
# Index CSS properties
find "$mdn_repo/files/en-us/web/css" -maxdepth 2 -name "index.md" 2>/dev/null | while read -r file; do
local dir
dir=$(dirname "$file")
local term
term=$(basename "$dir")
local title
title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "${term}|${file}|${title:-$term}"
done
# Index Glossary
find "$mdn_repo/files/en-us/glossary" -name "index.md" 2>/dev/null | while read -r file; do
local dir
dir=$(dirname "$file")
local term
term=$(basename "$dir")
local title
title=$(grep -m1 "^title:" "$file" 2>/dev/null | sed 's/^title:\s*//' | tr -d '"')
echo "${term}|${file}|${title:-$term}"
done
} | sort -t'|' -k1,1 -u > "$index"
local count
count=$(wc -l < "$index")
print_success "MDN index created with $count entries"
}
#==============================================================================
# Rust Documentation (via rustup)
#==============================================================================
download_rust_docs() {
print_header "Rust Documentation"
local dest="$DOCS_DIR/rust"
if command -v rustup &>/dev/null; then
print_status "Rust docs available via 'rustup doc'"
# Get the rust doc path
local rust_doc_path
rust_doc_path=$(rustup doc --path 2>/dev/null | head -1 | xargs dirname 2>/dev/null)
if [ -n "$rust_doc_path" ] && [ -d "$rust_doc_path" ]; then
ln -sf "$rust_doc_path" "$dest/std"
print_success "Linked Rust std docs from $rust_doc_path"
build_rust_index
fi
else
print_status "Rust not installed. Install with: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
fi
}
build_rust_index() {
print_status "Building Rust documentation index..."
local index="$INDEX_DIR/rust_index.txt"
if command -v rustup &>/dev/null; then
local rust_doc_path
rust_doc_path=$(rustup doc --path 2>/dev/null | head -1 | xargs dirname 2>/dev/null)
if [ -d "$rust_doc_path/std" ]; then
find "$rust_doc_path/std" -name "*.html" 2>/dev/null | head -2000 | while read -r file; do
basename "$file" .html
done | sort -u > "$index"
fi
fi
print_success "Rust index created"
}
#==============================================================================
# Go Documentation
#==============================================================================
download_go_docs() {
print_header "Go Documentation"
local dest="$DOCS_DIR/go"
if command -v go &>/dev/null; then
print_status "Go docs available via 'go doc'"
# Create a reference of standard library packages
mkdir -p "$dest"
go list std 2>/dev/null > "$dest/stdlib_packages.txt"
print_success "Go stdlib package list created"
build_go_index
else
print_status "Go not installed"
fi
}
build_go_index() {
print_status "Building Go documentation index..."
local dest="$DOCS_DIR/go"
local index="$INDEX_DIR/go_index.txt"
if [ -f "$dest/stdlib_packages.txt" ]; then
cp "$dest/stdlib_packages.txt" "$index"
fi
print_success "Go index created"
}
#==============================================================================
# Shell/Bash Documentation (man pages + built-in help)
#==============================================================================
download_shell_docs() {
print_header "Shell/Bash Documentation"
local dest="$DOCS_DIR/shell"
mkdir -p "$dest"
print_status "Extracting bash built-in help..."
# Extract help for all bash builtins
{
echo "# Bash Built-in Commands Reference"
echo "# Generated from 'help' command"
echo ""
# Get list of builtins
compgen -b 2>/dev/null | while read -r builtin; do
echo "=== $builtin ==="
help "$builtin" 2>/dev/null || echo "No help available"
echo ""
done
} > "$dest/bash_builtins.txt"
# Create quick reference for common commands
cat > "$dest/common_commands.txt" << 'SHELLREF'
# Common Shell Commands Quick Reference
## File Operations
ls - List directory contents
cd - Change directory
pwd - Print working directory
cp - Copy files
mv - Move/rename files
rm - Remove files
mkdir - Create directory
rmdir - Remove empty directory
touch - Create empty file / update timestamp
cat - Concatenate and display files
head - Display first lines
tail - Display last lines
less - Page through file
find - Search for files
locate - Find files by name (uses database)
## Text Processing
grep - Search text patterns
sed - Stream editor
awk - Pattern scanning and processing
cut - Remove sections from lines
sort - Sort lines
uniq - Report or omit repeated lines
wc - Word, line, character count
tr - Translate characters
diff - Compare files
## Process Management
ps - Report process status
top - Display processes
kill - Send signal to process
pkill - Kill processes by name
bg - Background a process
fg - Foreground a process
jobs - List background jobs
nohup - Run immune to hangups
## Networking
curl - Transfer data from URL
wget - Download files
ssh - Secure shell
scp - Secure copy
rsync - Remote sync
ping - Test connectivity
netstat - Network statistics
ss - Socket statistics
## Archives
tar - Tape archive
gzip - Compress files
gunzip - Decompress files
zip - Package and compress
unzip - Extract zip archives
## Permissions
chmod - Change file permissions
chown - Change file owner
chgrp - Change file group
## Disk
df - Disk free space
du - Disk usage
mount - Mount filesystem
umount - Unmount filesystem
## System
uname - System information
hostname - Show/set hostname
uptime - System uptime
free - Memory usage
date - Display/set date
cal - Display calendar
## Bash Builtins
echo - Display text
printf - Formatted output
read - Read input
export - Set environment variable
source - Execute script in current shell
alias - Create command alias
type - Display command type
which - Locate command
declare - Declare variables
local - Local variable
set - Set shell options
shopt - Shell options
trap - Trap signals
eval - Evaluate arguments
exec - Execute command
SHELLREF
print_success "Shell documentation created"
build_shell_index
}
build_shell_index() {
print_status "Building Shell documentation index..."
local dest="$DOCS_DIR/shell"
local index="$INDEX_DIR/shell_index.txt"
{
# Bash builtins
compgen -b 2>/dev/null | while read -r cmd; do
echo "$cmd $dest/bash_builtins.txt"
done
# Common commands from man pages
for cmd in ls cd cp mv rm mkdir cat grep sed awk find sort curl wget tar chmod; do
man_path=$(man -w "$cmd" 2>/dev/null)
[ -n "$man_path" ] && echo "$cmd $man_path"
done
} | sort -u > "$index"
print_success "Shell index created"
}
#==============================================================================
# Zeal Docsets (cross-platform dash alternative)
#==============================================================================
setup_zeal_docsets() {
print_header "Zeal Docsets (Optional)"
if ! command -v zeal &>/dev/null; then
print_status "Zeal not installed."
print_status "Install with: pacman -S zeal (or your package manager)"
print_status "Zeal provides a GUI for offline documentation"
return 0
fi
print_status "Zeal is installed. You can download docsets from within Zeal."
print_status "Recommended docsets: Python 3, JavaScript, TypeScript, C, C++"
}
#==============================================================================
# Main
#==============================================================================
usage() {
cat << EOF
Usage: $0 [OPTIONS]
Download and setup offline documentation for programming languages.
Options:
--all Download all available documentation
--python Download Python documentation
--cpp, --c Download C/C++ documentation (cppreference)
--js Download JavaScript documentation
--rust Download/link Rust documentation
--go Download/link Go documentation
--shell Generate Shell/Bash documentation
--zeal Setup Zeal docsets info
--status Show what's installed
--help Show this help
Documentation is stored in: $DOCS_DIR
Examples:
$0 --all # Download everything
$0 --python --cpp # Download Python and C++ docs
$0 --status # Check what's installed
EOF
}
show_status() {
print_header "Offline Documentation Status"
echo "Documentation directory: $DOCS_DIR"
echo ""
for lang in python c_cpp javascript rust go shell; do
dir="$DOCS_DIR/$lang"
if [ -d "$dir" ] && [ "$(ls -A "$dir" 2>/dev/null)" ]; then
size=$(du -sh "$dir" 2>/dev/null | cut -f1)
print_success "$lang: installed ($size)"
else
print_error "$lang: not installed"
fi
done
echo ""
echo "Index files:"
ls -la "$INDEX_DIR"/*.txt 2>/dev/null || echo "No indexes built yet"
}
main() {
setup_dirs
if [ $# -eq 0 ]; then
usage
exit 0
fi
while [ $# -gt 0 ]; do
case "$1" in
--all)
download_python_docs
download_cpp_docs
download_js_docs
download_rust_docs
download_go_docs
download_shell_docs
setup_zeal_docsets
;;
--python)
download_python_docs
;;
--cpp|--c|--c++)
download_cpp_docs
;;
--js|--javascript)
download_js_docs
;;
--rust)
download_rust_docs
;;
--go)
download_go_docs
;;
--shell|--bash)
download_shell_docs
;;
--zeal)
setup_zeal_docsets
;;
--status)
show_status
;;
--help|-h)
usage
exit 0
;;
*)
print_error "Unknown option: $1"
usage
exit 1
;;
esac
shift
done
echo ""
print_header "Setup Complete"
echo "Documentation stored in: $DOCS_DIR"
echo ""
echo "Use 'lookup_docs.sh <term> [language]' to search documentation"
}
main "$@"