rc-migration-tests/ef-migration/infrastructure/sql-server/extract_indexes.sh

#!/bin/bash

# Check if input file is provided
if [ $# -ne 1 ]; then
    echo "Usage: $0 <sql_file>"
    exit 1
fi

SQL_FILE="$1"
BASE_DIR="sql_objects"

# Create directory structure
mkdir -p "$BASE_DIR/indexes/clustered"
mkdir -p "$BASE_DIR/indexes/nonclustered"
mkdir -p "$BASE_DIR/indexes/unique_clustered"

# Ensure proper permissions
chmod -R 755 "$BASE_DIR"

# Function to clean filename
clean_filename() {
    # Remove brackets, convert dots and spaces to underscores, remove parentheses
    echo "$1" | sed 's/\[\|\]//g' | tr '.' '_' | tr ' ' '_' | sed 's/[()]//g' | sed 's/__*/_/g' | sed 's/_$//'
}

# Function to determine if line is start of an index creation statement
is_index_start() {
    # Matches with UNIQUE, CLUSTERED/NONCLUSTERED keywords
    echo "$1" | grep -qiE "CREATE[[:space:]]*(UNIQUE[[:space:]]+)?(CLUSTERED|NONCLUSTERED)?[[:space:]]*INDEX[[:space:]]+\[?[^]]+\]?[[:space:]]+ON"
}

# Function to extract table name
extract_table_name() {
    # More aggressive extraction of table name
    echo "$1" | sed -nE 's/.*CREATE[[:space:]]*(UNIQUE[[:space:]]+)?(CLUSTERED|NONCLUSTERED)?[[:space:]]*INDEX[[:space:]]+\[?[^]]+\]?[[:space:]]+ON[[:space:]]+(\[[^]]+\](\.[^]]+)?).*/\3/p' | tr -d '[]'
}

# Function to extract index name
extract_index_name() {
    # Extract index name, handling various formats
    echo "$1" | sed -nE 's/.*CREATE[[:space:]]*(UNIQUE[[:space:]]+)?(CLUSTERED|NONCLUSTERED)?[[:space:]]*INDEX[[:space:]]+\[?([^]]+)\]?.*/\3/p' | tr -d '[]'
}

# Function to determine index type
get_index_type() {
    local line="$1"
    local is_unique=$(echo "$line" | grep -qiE "CREATE[[:space:]]+UNIQUE" && echo true || echo false)
    local is_clustered=$(echo "$line" | grep -qiE "CREATE[[:space:]]+CLUSTERED" && echo true || echo false)
    local is_nonclustered=$(echo "$line" | grep -qiE "CREATE[[:space:]]+NONCLUSTERED" && echo true || echo false)

    if $is_unique && $is_clustered; then
        echo "unique_clustered"
    elif $is_clustered; then
        echo "clustered"
    elif $is_nonclustered; then
        echo "nonclustered"
    else
        echo "clustered"  # Default to clustered if no type specified
    fi
}

# Function to process each index statement
process_index() {
    local line="$1"
    local table_name=$(extract_table_name "$line")
    local index_name=$(extract_index_name "$line")
    local index_type=$(get_index_type "$line")

    # Skip if we couldn't extract table or index name
    if [ -z "$table_name" ] || [ -z "$index_name" ]; then
        echo "Warning: Could not extract table or index name from:"
        echo "$line"
        return
    fi

    # Clean filename and save
    clean_name=$(clean_filename "${table_name}_${index_name}")
    output_dir="$BASE_DIR/indexes/$index_type"
    output_file="${output_dir}/${clean_name}.sql"

    # Trim whitespace and ensure semicolon at end
    line=$(echo "$line" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
    if [[ "$line" != *\; ]]; then
        line="${line};"
    fi

    echo "$line" > "$output_file"
    echo "Created: $output_file"
}

# Main processing function
extract_indexes() {
    local current_statement=""
    local capturing_index=false

    # Disable case sensitivity
    shopt -s nocasematch

    # Read the file line by line
    while IFS= read -r line || [ -n "$line" ]; do
        # Skip empty lines and pure comment lines
        if [ -z "${line// }" ] || [[ "$line" =~ ^[[:space:]]*-- ]]; then
            continue
        fi

        # Check for index start
        if is_index_start "$line"; then
            # Process previous statement if any
            if [ -n "$current_statement" ] && $capturing_index; then
                process_index "$current_statement"
            fi

            # Start new index statement
            current_statement="$line"
            capturing_index=true

            # Check if GO is on the same line
            if [[ "$line" =~ GO[[:space:]]*$ ]]; then
                # Remove GO and process
                current_statement=$(echo "$current_statement" | sed 's/[[:space:]]*GO[[:space:]]*$//')
                process_index "$current_statement"
                current_statement=""
                capturing_index=false
            fi
            continue
        fi

        # If we're capturing an index
        if $capturing_index; then
            # Add line to current statement
            current_statement+=" $line"

            # Check for GO statement
            if [[ "$line" =~ ^[[:space:]]*GO[[:space:]]*$ ]]; then
                # Remove GO and process
                current_statement=$(echo "$current_statement" | sed 's/[[:space:]]*GO[[:space:]]*$//; s/;[[:space:]]*$//')
                process_index "$current_statement"
                current_statement=""
                capturing_index=false
            fi
        fi
    done

    # Process any remaining index statement
    if [ -n "$current_statement" ] && $capturing_index; then
        process_index "$current_statement"
    fi

    # Re-enable case sensitivity
    shopt -u nocasematch
}

# Remove comment lines and process
echo "Extracting indexes..."
sed 's/--.*$//' "$SQL_FILE" | extract_indexes

echo "Indexes have been saved in $BASE_DIR/indexes"

# Count and list files
echo -e "\nFile counts:"
for index_type in clustered nonclustered unique_clustered; do
    count=$(ls -1 "$BASE_DIR/indexes/$index_type"/*.sql 2>/dev/null | wc -l || echo 0)
    echo "$index_type indexes: $count files"

    if [ "$count" -gt 0 ]; then
        echo -e "\nExtracted $index_type indexes:"
        for file in "$BASE_DIR/indexes/$index_type"/*.sql; do
            if [ -f "$file" ]; then
                echo "- $(basename "$file")"
            fi
        done
        echo
    fi
done