#!/bin/bash # KB Index Generation Script # Generates kb/_index.md with searchable metadata from all KB files set -e # Get the script directory and KB root directory SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" KB_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" INDEX_FILE="$KB_ROOT/_index.md" echo "Generating KB index..." # Create temporary files for indexing TMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'kb-index') trap "rm -rf '$TMP_DIR'" EXIT TOPICS_FILE="$TMP_DIR/topics.txt" TAGS_FILE="$TMP_DIR/tags.txt" PHASES_FILE="$TMP_DIR/phases.txt" FILES_FILE="$TMP_DIR/files.txt" touch "$TOPICS_FILE" "$TAGS_FILE" "$PHASES_FILE" "$FILES_FILE" # Categories to scan CATEGORIES=("01_projects" "02_systems" "03_research" "04_design" "05_decisions" "06_glossary" "07_playbooks" "08_archive") # Function to extract frontmatter from a file extract_frontmatter() { local file="$1" if [[ ! -f "$file" ]]; then return 1 fi # Extract content between first --- and second --- awk '/^---$/{if(++count==2)exit} count==1' "$file" 2>/dev/null || echo "" } # Function to extract a YAML field value (simple fields) extract_yaml_simple() { local frontmatter="$1" local field="$2" echo "$frontmatter" | grep "^${field}:" | sed "s/^${field}:[[:space:]]*//" | sed 's/^["'\'']//;s/["'\'']$//' | head -1 } # Function to extract array values from YAML extract_yaml_array() { local frontmatter="$1" local field="$2" # Try to extract array - handle both single-line and multi-line local array_content=$(echo "$frontmatter" | awk -v field="$field:" ' BEGIN { in_array=0; found=0 } $0 ~ "^" field { found=1 sub("^" field "[[:space:]]*", "") if ($0 ~ /\[.*\]/) { print $0 exit } in_array=1 next } in_array { if ($0 ~ /^[^[:space:]]/ && $0 !~ /^-/ && $0 !~ /^\[/) { in_array=0 exit } if ($0 ~ /^-/ || $0 ~ /^\[/) { print $0 } } ') # Extract values from array echo "$array_content" | grep -oE '["'\''][^"'\'']+["'\'']|[^, \[\]]+' | sed 's/^["'\'']//;s/["'\'']$//;s/^[[:space:]]*//;s/[[:space:]]*$//' | grep -v '^$' | grep -v '^\[' | grep -v '^\]' } # Function to process a KB file process_kb_file() { local file="$1" local relative_path="${file#$KB_ROOT/}" local category="" # Determine category from path for cat in "${CATEGORIES[@]}"; do if [[ "$relative_path" == "$cat"/* ]]; then category="$cat" break fi done if [[ -z "$category" ]]; then return 0 # Skip files not in known categories fi # Extract frontmatter local frontmatter=$(extract_frontmatter "$file") if [[ -z "$frontmatter" ]]; then echo "Warning: No frontmatter found in $relative_path" >&2 return 0 fi # Extract metadata local title=$(extract_yaml_simple "$frontmatter" "title") local date=$(extract_yaml_simple "$frontmatter" "date") local type=$(extract_yaml_simple "$frontmatter" "type") local summary=$(extract_yaml_simple "$frontmatter" "summary") # Store file info echo "$category|$relative_path|$title|$date|$type|$summary" >> "$FILES_FILE" # Extract and index topics local topics=$(extract_yaml_array "$frontmatter" "topics") if [[ -n "$topics" ]]; then while IFS= read -r topic; do topic=$(echo "$topic" | xargs) if [[ -n "$topic" ]]; then echo "$topic|$relative_path" >> "$TOPICS_FILE" fi done <<< "$topics" fi # Extract and index tags local tags=$(extract_yaml_array "$frontmatter" "tags") if [[ -n "$tags" ]]; then while IFS= read -r tag; do tag=$(echo "$tag" | xargs) if [[ -n "$tag" ]]; then echo "$tag|$relative_path" >> "$TAGS_FILE" fi done <<< "$tags" fi # Extract and index phase relevance local phases=$(extract_yaml_array "$frontmatter" "phase_relevance") if [[ -n "$phases" ]]; then while IFS= read -r phase; do phase=$(echo "$phase" | xargs) if [[ -n "$phase" ]]; then echo "$phase|$relative_path" >> "$PHASES_FILE" fi done <<< "$phases" fi } # Scan all KB files echo "Scanning KB files..." for category in "${CATEGORIES[@]}"; do category_dir="$KB_ROOT/$category" if [[ ! -d "$category_dir" ]]; then continue fi # Find all .md files in category find "$category_dir" -type f -name "*.md" | while read -r file; do # Skip if in a special subdirectory if [[ "$file" == *"/_guides/"* ]] || \ [[ "$file" == *"/_templates/"* ]] || \ [[ "$file" == *"/_inbox/"* ]] || \ [[ "$file" == *"/_review_queue/"* ]]; then continue fi # Check if filename matches KB pattern filename=$(basename "$file") if [[ "$filename" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}--[a-z0-9-]+--(idea|note|spec|decision|howto|retro|meeting)(--p[0-9]+)?\.md$ ]]; then process_kb_file "$file" fi done done # Count files FILE_COUNT=$(wc -l < "$FILES_FILE" 2>/dev/null || echo "0") # Generate index file echo "Generating index file..." { cat << EOF # KB Index _Last updated: $(date +%Y-%m-%d)_ This index is automatically generated from KB file metadata. It provides searchable access to all KB content organized by category, topic, tag, and phase relevance. --- ## File Listing by Category EOF # Output files by category for category in "${CATEGORIES[@]}"; do category_files=$(grep "^$category|" "$FILES_FILE" 2>/dev/null || true) if [[ -n "$category_files" ]]; then echo "### $category" echo "" while IFS='|' read -r cat path title date type summary; do echo "- [\`$path\`]($path) - $title ($date, $type)" done <<< "$category_files" echo "" fi done # Topics Index if [[ -s "$TOPICS_FILE" ]]; then echo "## Topics Index" echo "" sort -u "$TOPICS_FILE" | cut -d'|' -f1 | sort -u | while read -r topic; do echo "### $topic" grep "^$topic|" "$TOPICS_FILE" | cut -d'|' -f2 | sort -u | while read -r file; do echo "- [\`$file\`]($file)" done echo "" done fi # Tags Index if [[ -s "$TAGS_FILE" ]]; then echo "## Tags Index" echo "" sort -u "$TAGS_FILE" | cut -d'|' -f1 | sort -u | while read -r tag; do echo "### $tag" grep "^$tag|" "$TAGS_FILE" | cut -d'|' -f2 | sort -u | while read -r file; do echo "- [\`$file\`]($file)" done echo "" done fi # Phase Relevance Index if [[ -s "$PHASES_FILE" ]]; then echo "## Phase Relevance Index" echo "" sort -u "$PHASES_FILE" | cut -d'|' -f1 | sort -u | while read -r phase; do echo "### $phase" grep "^$phase|" "$PHASES_FILE" | cut -d'|' -f2 | sort -u | while read -r file; do echo "- [\`$file\`]($file)" done echo "" done fi # Summary TOPIC_COUNT=$(cut -d'|' -f1 "$TOPICS_FILE" 2>/dev/null | sort -u | wc -l || echo "0") TAG_COUNT=$(cut -d'|' -f1 "$TAGS_FILE" 2>/dev/null | sort -u | wc -l || echo "0") PHASE_COUNT=$(cut -d'|' -f1 "$PHASES_FILE" 2>/dev/null | sort -u | wc -l || echo "0") echo "---" echo "" echo "## Summary" echo "" echo "- **Total KB Files**: $FILE_COUNT" echo "- **Unique Topics**: $TOPIC_COUNT" echo "- **Unique Tags**: $TAG_COUNT" echo "- **Phases Referenced**: $PHASE_COUNT" echo "" echo "_Index generated on $(date +%Y-%m-%d\ %H:%M:%S)_" } > "$INDEX_FILE" echo "Index generated successfully: $INDEX_FILE" echo " - Files indexed: $FILE_COUNT" echo " - Topics: $TOPIC_COUNT" echo " - Tags: $TAG_COUNT" echo " - Phases: $PHASE_COUNT"