From 5faabed662c7f355b3e9d435c20e9655b52797cc Mon Sep 17 00:00:00 2001
From: Martin Cech <marten@bx.psu.edu>
Date: Tue, 28 Apr 2026 12:19:18 +0200
Subject: [PATCH] deploy alphafind on qa1

---
 files/galaxy/tools/alphafind.xml              | 220 +++++
 files/galaxy/tools/alphafind_search.py        | 899 ++++++++++++++++++
 group_vars/galaxyservers.yml                  |   2 +
 .../vars.yml                                  |   3 +-
 .../config/local_tool_conf.xml.j2             |   1 +
 templates/galaxy/config/tpv_rules_meta.yml.j2 |   4 +-
 6 files changed, 1126 insertions(+), 3 deletions(-)
 create mode 100644 files/galaxy/tools/alphafind.xml
 create mode 100755 files/galaxy/tools/alphafind_search.py
diff --git a/files/galaxy/tools/alphafind.xml b/files/galaxy/tools/alphafind.xml
new file mode 100644
index 00000000..3d504b88
--- /dev/null
+++ b/files/galaxy/tools/alphafind.xml
@@ -0,0 +1,220 @@
+<tool id="alphafind_search" name="AlphaFind Protein Search" version="1.0.0">
+    <description>Search AlphaFind API for structurally similar proteins</description>
+    <requirements>
+        <requirement type="package" version="3.6">python</requirement>
+        <requirement type="package" version="2.31.0">requests</requirement>
+    </requirements>
+    <version_command><![CDATA[
+python3 '$__tool_directory__/alphafind_search.py' --version 2>/dev/null || echo "1.0.0"
+    ]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        python3 '$__tool_directory__/alphafind_search.py'
+            --query '$query'
+            #for $idx in $index
+                --index '$idx'
+            #end for
+            #if $filtering.organism
+                --filter-organism '$filtering.organism'
+            #end if
+            #if $filtering.tax_id
+                --filter-tax-id $filtering.tax_id
+            #end if
+            #if $filtering.gene_name
+                --filter-gene-name '$filtering.gene_name'
+            #end if
+            #if $filtering.cath_annotation
+                --filter-cath-annotation '$filtering.cath_annotation'
+            #end if
+            #if $searching.k
+                --option-k $searching.k
+            #end if
+            --timeout ${searching.timeout}
+            --sort-by ${sorting.sort_by}
+            --sort-order ${sorting.sort_order}
+            --quiet
+            --output '$output'
+    ]]></command>
+    <inputs>
+        <!-- Required: Query protein ID -->
+        <param name="query" type="text" optional="false" label="Protein Query" help="Enter a UniProt protein ID (e.g., P0A6F5, Q8Y547, Q9SBL1)"/>
+        <!-- Required: Index type selection -->
+        <param name="index" type="select" label="Search Index" multiple="true" help="Select one or more index types to search. Defaults to all.">
+            <option value="chains">Chains</option>
+            <option value="chains_90">Chains 90% identity</option>
+            <option value="chains_80">Chains 80% identity</option>
+            <option value="chains_70">Chains 70% identity</option>
+            <option value="domains">Domains</option>
+        </param>
+        <!-- Optional Filters Section -->
+        <section name="filtering" title="Filtering Options" expanded="false">
+            <param name="organism" type="text" label="Organism Name" help="Filter by organism name (e.g., 'Mycobacterium tuberculosis')" optional="true"/>
+            <param name="tax_id" type="integer" label="Taxonomy ID" help="NCBI Taxonomy ID (numeric)" optional="true"/>
+            <param name="gene_name" type="text" label="Gene Name" help="Filter by gene name" optional="true"/>
+            <param name="cath_annotation" type="text" label="CATH Annotation" help="Filter by CATH annotation (only applied when using 'domains' index)" optional="true"/>
+        </section>
+        <!-- Search Options Section -->
+        <section name="searching" title="Search Options" expanded="false">
+            <param name="k" type="integer" label="Number of Similar Proteins" value="10" min="1" max="5000" help="Maximum number of similar proteins to return (k). Note: API can return up to 5000 results."/>
+            <param name="timeout" type="integer" label="Timeout (seconds)" value="600" min="60" max="3600" help="Maximum time to wait for API to complete the search and computations."/>
+        </section>
+        <!-- Sorting Options -->
+        <section name="sorting" title="Sorting Options" expanded="false">
+            <param name="sort_by" type="select" label="Sort Results By" help="Choose how to order the results">
+                <option value="knn">KNN Similarity Score (default)</option>
+                <option value="tm_score">TM-Score</option>
+            </param>
+            <param name="sort_order" type="select" label="Sort Order" help="Choose ascending or descending order">
+                <option value="desc">Descending (highest first)</option>
+                <option value="asc">Ascending (lowest first)</option>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output" format="csv" label="${tool.name} on ${query}"/>
+    </outputs>
+    <tests>
+        <!-- Test basic search with known protein -->
+        <test>
+            <param name="query" value="P9WGR1"/>
+            <param name="index" value="chains"/>
+            <section name="filtering">
+                <param name="organism" value="Mycobacterium tuberculosis"/>
+            </section>
+            <output name="output" file="test-data/P9WGR1_basic.csv"/>
+        </test>
+        <!-- Test with TM-score sorting -->
+        <test>
+            <param name="query" value="Q9SBL1"/>
+            <param name="index" value="chains"/>
+            <section name="filtering">
+                <param name="organism" value="Mycobacterium tuberculosis"/>
+            </section>
+            <section name="sorting">
+                <param name="sort_by" value="tm_score"/>
+                <param name="sort_order" value="desc"/>
+            </section>
+            <output name="output" file="test-data/Q9SBL1_sorted.csv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**AlphaFind Protein Search**
+
+This tool searches the AlphaFind API for structurally similar proteins based on 3D structural similarity.
+
+-----
+
+**What is AlphaFind?**
+
+AlphaFind is a service for searching protein structures using AlphaFold predictions. It uses structural embeddings to find proteins with similar 3D conformations.
+
+-----
+
+**Input Parameters**
+
+* **Protein Query**: A UniProt protein ID (e.g., P0A6F5, Q8Y547, Q9SBL1)
+
+* **Search Index**: Choose which structural databases to search:
+  - *Chains*: Full protein chains (recommended)
+  - *Chains 90%*: Chains filtered to 90% sequence identity
+  - *Chains 80%*: Chains filtered to 80% sequence identity
+  - *Chains 70%*: Chains filtered to 70% sequence identity
+  - *Domains*: Protein domains (independent structural units)
+
+* **Optional Filters**: Narrow your search results
+  - *Organism Name*: Filter by organism (e.g., 'Mycobacterium tuberculosis')
+  - *Taxonomy ID*: Filter by NCBI taxonomy ID (numeric)
+  - *Gene Name*: Filter by gene symbol
+  - *CATH Annotation*: Filter by CATH structural classification (domains only)
+
+* **Search Options**:
+  - *Number of Similar Proteins*: Control result size (1-5000). Note that even with low k, the API may return all available matches up to 5000.
+  - *Timeout*: Maximum wait time for computations (60-3600 seconds). Complex searches may take several minutes.
+
+* **Sorting**:
+  - *KNN Similarity*: Sort by embedding similarity score
+  - *TM-Score*: Sort by structural alignment TM-score
+
+-----
+
+**Output Format**
+
+The tool produces a CSV file with 23 columns:
+
+- query_id: Unique query identifier
+- index_type: Index type used
+- page_number: Page number (internal)
+- target_id: Target protein ID
+- score: KNN similarity score (0-1)
+- organism: Target protein organism
+- tax_id: Taxonomy ID
+- gene_name: Gene symbol
+- protein_name: Protein name
+- avg_plddt: AlphaFold prediction quality
+- tm_score_query: TM-score (query as reference)
+- tm_score_target: TM-score (target as reference)
+- rmsd: Root-mean-square deviation
+- sequential_identity: Sequence alignment identity
+- aligned_residues: Aligned residue ratio
+- status: Computation status
+- created_at: Computation start time
+- completed_at: Computation end time
+- has_experimental_structure: Experimental structure available
+- pdb_ids: PDB identifiers
+- chopping: Domain boundaries (if domains)
+- tar_index: Internal reference
+
+-----
+
+**Important Notes**
+
+* This is a web-based API. The tool requires internet connectivity to https://alphafind.ics.muni.cz
+* Computations may take from seconds to several minutes depending on query complexity
+* Results are cached by the API, so repeated queries may complete faster
+* The API can return up to 5000 results even with a low k value
+* TM-score > 0.5 indicates significant structural similarity
+* TM-score > 0.8 indicates very high structural similarity
+
+-----
+
+**Examples**
+
+1. **Basic search**: Query protein P0A6F5 with default settings
+
+2. **Find similar proteins in specific organism**:
+
+   - Query: Q8Y547
+   - Organism: Mycobacterium tuberculosis
+
+3. **Domain-level search**:
+
+   - Query: F8U1Q0
+   - Index: domains
+   - CATH annotation: 1.10.8.10
+
+4. **High-confidence results**:
+
+   - Query: P9WGR1
+   - Sort by: TM-Score
+   - Sort order: Descending
+
+-----
+
+**References**
+
+* AlphaFind API: https://alphafind.ics.muni.cz
+* AlphaFold DB: https://alphafold.ebi.ac.uk
+* TM-Score paper: Yang & Skolnick, 2004
+
+-----
+
+**Troubleshooting**
+
+* **"Protein not found"**: The query ID may not exist in AlphaFold DB or lacks embedding vectors. Verify the UniProt ID.
+* **Timeout exceeded**: Increase the timeout value in Search Options.
+* **Empty results**: Your filters may be too restrictive, or no similar proteins exist with the specified criteria.
+* **"Search failed"**: The API encountered an error. Try again or check the API status.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/nature14539</citation>
+    </citations>
+</tool>
diff --git a/files/galaxy/tools/alphafind_search.py b/files/galaxy/tools/alphafind_search.py
new file mode 100755
index 00000000..487e24a6
--- /dev/null
+++ b/files/galaxy/tools/alphafind_search.py
@@ -0,0 +1,899 @@
+#!/usr/bin/env python3
+"""
+AlphaFind API Client
+
+Search AlphaFold protein structures for structural similarity via the AlphaFind API.
+
+Usage:
+    python3 alphafind_search.py --query P0A6F5 --output results.csv
+
+Search Features:
+    - Query by UniProt protein ID
+    - Multiple index types: chains, chains_90, chains_80, chains_70, domains
+    - Filters: organism, taxonomy ID, gene name, CATH annotation (domains)
+    - Pagination and sorting support
+    - Asynchronous computation with progress tracking
+
+Output:
+    CSV file with structural similarity results including TM-scores and metadata
+
+API Limitations:
+    - Maximum 5,000 results per query (hard limit)
+    - Page size limited to 100 results by the API
+    - TM-score calculations can take time for large result sets
+
+Common Use Cases:
+    1. Basic search for similar structures: --query P0A6F5
+    2. Filter by organism: --filters '{"organism": "Mycobacterium tuberculosis"}'
+    3. Sort by TM-score: --sort-by tm_score --sort-order desc
+    4. Get more results: --options '{"size": 5000}' (max API limit)
+
+For queries with >5000 results, split by organism or tax_id and combine.
+
+Version: 1.0.0
+"""
+
+__version__ = "1.0.0"
+
+import argparse
+import csv
+import json
+import logging
+import sys
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import requests
+
+# ============================================================================
+# Constants
+# ============================================================================
+
+# Exit codes
+EXIT_SUCCESS = 0
+EXIT_SEARCH_FAILED = 1
+EXIT_TIMEOUT = 2
+EXIT_HTTP_ERROR = 3
+EXIT_INTERRUPTED = 4
+EXIT_UNEXPECTED_ERROR = 5
+
+# API defaults
+DEFAULT_BASE_URL = "https://alphafind.ics.muni.cz"
+DEFAULT_PAGE_SIZE = 100
+DEFAULT_TIMEOUT = 600
+DEFAULT_POLL_INTERVAL = 5
+
+# Pagination: API max is 100 per page
+MAX_PAGE_SIZE = 100
+
+# Result limits
+MAX_RESULTS_PER_QUERY = 5000
+
+# Sort options
+DEFAULT_SORT_BY = 'knn'
+DEFAULT_SORT_ORDER = 'desc'
+VALID_SORT_BY = ['knn', 'tm_score']
+VALID_SORT_ORDER = ['asc', 'desc']
+
+# Index types
+VALID_INDEXES = ['chains', 'chains_90', 'chains_80', 'chains_70', 'domains']
+
+# CSV output columns
+CSV_COLUMNS = [
+    'query_id', 'index_type', 'page_number',
+    'target_id', 'score', 'organism', 'tax_id',
+    'gene_name', 'protein_name', 'avg_plddt',
+    'tm_score_query', 'tm_score_target', 'rmsd',
+    'sequential_identity', 'aligned_residues',
+    'status', 'created_at', 'completed_at',
+    'has_experimental_structure', 'pdb_ids',
+    'chopping', 'tar_index'
+]
+
+# Status constants
+STATUS_PENDING = 'pending'
+STATUS_SCORING = 'scoring'
+STATUS_COMPLETED = 'completed'
+STATUS_FAILED = 'failed'
+STATUS_FINAL = (STATUS_COMPLETED, STATUS_FAILED)
+
+
+# ============================================================================
+# Data Classes
+# ============================================================================
+
+@dataclass
+class SearchConfig:
+    """Configuration for a search operation."""
+    query: str
+    index: Optional[List[str]]  # None = use server default
+    filters: Dict[str, Any]
+    options: Dict[str, Any]
+    output_file: str
+    base_url: str
+    poll_interval: int
+    timeout: int
+    page_size: int
+    sort_by: str = DEFAULT_SORT_BY
+    sort_order: str = DEFAULT_SORT_ORDER
+    verbose: bool = False
+    dry_run: bool = False
+    quiet: bool = False
+
+
+# ============================================================================
+# API Client
+# ============================================================================
+
+class AlphaFindClient:
+    """Client for interacting with the AlphaFind API."""
+
+    def __init__(self, base_url: str, timeout: int = 30):
+        """Initialize the AlphaFind client.
+
+        Args:
+            base_url: Base URL of the AlphaFind API
+            timeout: Request timeout in seconds
+        """
+        self.base_url = base_url.rstrip('/')
+        self.timeout = timeout
+        self.session = requests.Session()
+        self.session.headers.update({'Content-Type': 'application/json'})
+
+    def health_check(self) -> bool:
+        """Check if the API is accessible.
+
+        Returns:
+            True if API is healthy, False otherwise
+        """
+        try:
+            response = self.session.get(
+                f"{self.base_url}/api/health",
+                timeout=self.timeout
+            )
+            response.raise_for_status()
+            return True
+        except requests.RequestException as e:
+            logging.warning(f"Health check failed: {e}")
+            return False
+
+    def submit_search(
+        self,
+        query: str,
+        index: Optional[List[str]],
+        filters: Dict[str, Any],
+        options: Dict[str, Any]
+    ) -> tuple[str, str, str]:
+        """Submit a search query to the API.
+
+        Args:
+            query: Protein ID to search for
+            index: Index types to search (None for server default)
+            filters: Filter criteria
+            options: Search options
+
+        Returns:
+            Tuple of (query_id, index_type, status)
+
+        Raises:
+            requests.HTTPError: If the API request fails
+        """
+        payload = {
+            "query": query,
+            "filters": filters,
+            "options": options
+        }
+
+        # Only include index if specified
+        if index:
+            payload["index"] = index
+
+        logging.info(f"Submitting search for query: {query}")
+        logging.info(f"API URL: {self.base_url}/api/search")
+        logging.info(f"API Payload: {json.dumps(payload)}")
+
+        response = self.session.post(
+            f"{self.base_url}/api/search",
+            json=payload,
+            timeout=self.timeout
+        )
+        response.raise_for_status()
+
+        data = response.json()
+        query_id = data['id']
+        index_type = data['index_type'][0]
+        status = data['status']
+
+        logging.info(f"Search submitted: query_id={query_id}, "
+                    f"index_type={index_type}, status={status}")
+        logging.debug(f"Response: {json.dumps(data, indent=2)}")
+
+        return query_id, index_type, status
+
+    def get_results(
+        self,
+        query_id: str,
+        index_type: str,
+        page: int = 1,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        sort_by: str = DEFAULT_SORT_BY,
+        sort_order: str = DEFAULT_SORT_ORDER
+    ) -> Dict[str, Any]:
+        """Retrieve paginated search results.
+
+        Args:
+            query_id: Query ID from search submission
+            index_type: Index type
+            page: Page number (1-indexed)
+            page_size: Number of results per page (max 100)
+            sort_by: Sort field ('knn' or 'tm_score')
+            sort_order: Sort order ('desc' or 'asc')
+
+        Returns:
+            Dictionary containing results and metadata
+
+        Raises:
+            requests.HTTPError: If the API request fails
+        """
+        params = {
+            "page": page,
+            "page_size": page_size,
+            "sort_by": sort_by,
+            "sort_order": sort_order
+        }
+
+        logging.debug(f"Fetching results: page={page}, page_size={page_size}")
+
+        response = self.session.get(
+            f"{self.base_url}/api/search/{query_id}/{index_type}/results",
+            params=params,
+            timeout=self.timeout
+        )
+
+        # Handle 404 responses that contain error messages
+        if response.status_code == 404:
+            try:
+                error_data = response.json()
+                if error_data.get('status') == STATUS_FAILED:
+                    error_message = error_data.get('message', 'Search query not found')
+                    sys.stderr.write(f"ERROR: {error_message}\n")
+                    sys.stderr.flush()
+                    raise RuntimeError(f"Search failed: {error_message}")
+            except (ValueError, KeyError):
+                # If response is not JSON or doesn't have expected structure
+                sys.stderr.write(f"ERROR: Search query not found for query_id: {query_id}, index_type: {index_type}\n")
+                sys.stderr.flush()
+                raise RuntimeError(f"Search query not found: query_id={query_id}, index_type={index_type}")
+
+        response.raise_for_status()
+
+        return response.json()
+
+    def wait_for_completion(
+        self,
+        query_id: str,
+        index_type: str,
+        poll_interval: int = DEFAULT_POLL_INTERVAL,
+        timeout: int = DEFAULT_TIMEOUT,
+        show_progress: bool = True
+    ) -> str:
+        """Poll the API until the search is completed or failed.
+
+        Args:
+            query_id: Query ID to monitor
+            index_type: Index type
+            poll_interval: Seconds between polls
+            timeout: Maximum time to wait in seconds
+            show_progress: Whether to show progress indicator
+
+        Returns:
+            Final status ('completed' or 'failed')
+
+        Raises:
+            TimeoutError: If timeout is exceeded
+        """
+        start_time = time.time()
+        status = STATUS_PENDING
+
+        logging.info(f"Waiting for completion (timeout={timeout}s, "
+                    f"poll_interval={poll_interval}s)")
+
+        while time.time() - start_time < timeout:
+            try:
+                response = self.get_results(query_id, index_type, page=1, page_size=1)
+                status = response.get('status', STATUS_PENDING)
+                total_results = response.get('total_results', 0)
+
+                if show_progress:
+                    sys.stdout.write(f"\r  Status: {status} | "
+                                    f"Elapsed: {int(time.time() - start_time)}s | "
+                                    f"Results: {total_results}")
+                    sys.stdout.flush()
+
+                if status in STATUS_FINAL:
+                    if show_progress:
+                        sys.stdout.write('\n')
+                    logging.info(f"Search finished with status: {status}")
+                    return status
+
+                time.sleep(poll_interval)
+
+            except requests.RequestException as e:
+                logging.warning(f"Polling error (will retry): {e}")
+                time.sleep(poll_interval)
+
+        if show_progress:
+            sys.stdout.write('\n')
+        raise TimeoutError(
+            f"Search did not complete within {timeout} seconds. "
+            f"Last status: {status}"
+        )
+
+    def get_all_results(
+        self,
+        query_id: str,
+        index_type: str,
+        page_size: int = DEFAULT_PAGE_SIZE,
+        sort_by: str = DEFAULT_SORT_BY,
+        sort_order: str = DEFAULT_SORT_ORDER,
+        show_progress: bool = True
+    ) -> List[Dict[str, Any]]:
+        """Fetch all paginated results.
+
+        Args:
+            query_id: Query ID
+            index_type: Index type
+            page_size: Number of results per page
+            sort_by: Sort field
+            sort_order: Sort order
+            show_progress: Whether to show progress
+
+        Returns:
+            List of all result dictionaries
+        """
+        all_results: List[Dict[str, Any]] = []
+        page = 1
+        total_pages = 1
+
+        logging.info(f"Fetching all results with page_size={page_size}")
+
+        while page <= total_pages:
+            try:
+                response = self.get_results(
+                    query_id, index_type,
+                    page=page,
+                    page_size=page_size,
+                    sort_by=sort_by,
+                    sort_order=sort_order
+                )
+
+                results = response.get('results', [])
+                all_results.extend(results)
+
+                if page == 1:
+                    total_pages = response.get('total_pages', 1)
+
+                if show_progress:
+                    sys.stdout.write(f"\r  Fetching page {page}/{total_pages}")
+                    sys.stdout.flush()
+
+                page += 1
+
+            except requests.RequestException as e:
+                logging.error(f"Error fetching page {page}: {e}")
+                break
+
+        if show_progress:
+            sys.stdout.write('\n')
+        logging.info(f"Fetched {len(all_results)} results")
+
+        return all_results
+
+
+# ============================================================================
+# Data Processing
+# ============================================================================
+
+def flatten_result(
+    result: Dict[str, Any],
+    query_id: str,
+    index_type: str,
+    page: int
+) -> Dict[str, Any]:
+    """Flatten nested result dictionary for CSV export.
+
+    Args:
+        result: Raw result from API
+        query_id: Query ID for this search
+        index_type: Index type for this search
+        page: Page number this result came from
+
+    Returns:
+        Flattened dictionary ready for CSV export
+    """
+    flattened = {
+        'query_id': query_id,
+        'index_type': index_type,
+        'page_number': page,
+        'target_id': result.get('target_id'),
+        'score': result.get('score'),
+        'organism': result.get('organism'),
+        'tax_id': result.get('tax_id'),
+        'gene_name': result.get('gene_name'),
+        'protein_name': result.get('protein_name'),
+        'avg_plddt': result.get('avg_plddt'),
+        'tm_score_query': result.get('tm_score_query'),
+        'tm_score_target': result.get('tm_score_target'),
+        'rmsd': result.get('rmsd'),
+        'sequential_identity': result.get('sequential_identity'),
+        'aligned_residues': result.get('aligned_residues'),
+    }
+
+    # Flatten metadata
+    metadata = result.get('metadata', {})
+    flattened['status'] = metadata.get('status')
+    flattened['created_at'] = metadata.get('created_at')
+    flattened['completed_at'] = metadata.get('completed_at')
+
+    # Flatten experimental_structure (can be None)
+    exp_structure = result.get('experimental_structure')
+    if exp_structure:
+        flattened['has_experimental_structure'] = exp_structure.get('has_experimental_structure')
+        pdb_ids = exp_structure.get('pdb_ids')
+        flattened['pdb_ids'] = ';'.join(pdb_ids) if pdb_ids else ''
+    else:
+        flattened['has_experimental_structure'] = None
+        flattened['pdb_ids'] = ''
+
+    # Handle optional fields
+    if 'tar_index' in result:
+        flattened['tar_index'] = str(result['tar_index'])
+
+    flattened['chopping'] = result.get('chopping', '')
+
+    return flattened
+
+
+def results_to_csv(
+    results: List[Dict[str, Any]],
+    query_id: str,
+    index_type: str,
+    filename: str,
+    page_size: int
+) -> int:
+    """Write flattened results to CSV file.
+
+    Args:
+        results: List of raw result dictionaries
+        query_id: Query ID for this search
+        index_type: Index type for this search
+        filename: Output CSV filename
+        page_size: Page size used (for page number assignment)
+
+    Returns:
+        Number of results written
+    """
+    if not results:
+        logging.warning("No results to write to CSV")
+        return 0
+
+    # Calculate page numbers and flatten all results
+    flattened_results = []
+    for i, result in enumerate(results):
+        page = (i // page_size) + 1
+        flattened_results.append(
+            flatten_result(result, query_id, index_type, page)
+        )
+
+    # Write to CSV
+    try:
+        with open(filename, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=CSV_COLUMNS, extrasaction='ignore')
+            writer.writeheader()
+            writer.writerows(flattened_results)
+
+        logging.info(f"Wrote {len(flattened_results)} results to {filename}")
+        return len(flattened_results)
+
+    except IOError as e:
+        logging.error(f"Error writing CSV file: {e}")
+        raise
+
+
+# ============================================================================
+# Argument Parsing
+# ============================================================================
+
+def parse_json_args(arg_string: str) -> Dict[str, Any]:
+    """Parse JSON string argument.
+
+    Args:
+        arg_string: JSON string to parse
+
+    Returns:
+        Parsed dictionary
+
+    Raises:
+        argparse.ArgumentTypeError: If JSON is invalid
+    """
+    if not arg_string:
+        return {}
+
+    try:
+        return json.loads(arg_string)
+    except json.JSONDecodeError as e:
+        raise argparse.ArgumentTypeError(f"Invalid JSON: {e}")
+
+
+def parse_arguments() -> SearchConfig:
+    """Parse command-line arguments.
+
+    Returns:
+        SearchConfig with all parameters
+    """
+    parser = argparse.ArgumentParser(
+        description='AlphaFind API Client - Search for protein structures',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Basic search
+  python3 alphafind_search.py --query P0A6F5
+
+  # Search with organism filter
+  python3 alphafind_search.py --query Q8Y547 --filters '{"organism": "Mycobacterium tuberculosis"}'
+
+  # Search with sorting
+  python3 alphafind_search.py --query Q9SBL1 --sort-by tm_score --sort-order desc
+
+  # Request maximum results (API limit is 5000 per query)
+  python3 alphafind_search.py --query P69905 --options '{"size": 5000}' --timeout 1800
+        """
+    )
+
+    # Required: query
+    parser.add_argument(
+        '--query',
+        help='UniProt protein ID to search (e.g., "P0A6F5")'
+    )
+
+    # Optional: index selection
+    parser.add_argument(
+        '--index',
+        nargs='+',
+        help=f'Index types to search (uses server default if not specified). '
+             f'Valid: {", ".join(VALID_INDEXES)}'
+    )
+
+    # Optional: filters
+    parser.add_argument(
+        '--filters',
+        type=parse_json_args,
+        default={},
+        help='Filter criteria as JSON string (e.g., \'{"organism": "Mycobacterium tuberculosis"}\')'
+    )
+
+    # Optional: search options
+    parser.add_argument(
+        '--options',
+        type=parse_json_args,
+        default={},
+        help='Search options as JSON string (e.g., \'{"size": 5000}\')'
+    )
+
+    # Optional: individual filters (for Galaxy compatibility)
+    parser.add_argument(
+        '--filter-organism',
+        help='Filter by organism name (e.g., "Mycobacterium tuberculosis")'
+    )
+    parser.add_argument(
+        '--filter-tax-id',
+        type=int,
+        help='Filter by NCBI Taxonomy ID (numeric)'
+    )
+    parser.add_argument(
+        '--filter-gene-name',
+        help='Filter by gene name'
+    )
+    parser.add_argument(
+        '--filter-cath-annotation',
+        help='Filter by CATH annotation (only for domains index)'
+    )
+
+    # Optional: individual options (for Galaxy compatibility)
+    parser.add_argument(
+        '--option-k',
+        type=int,
+        help='Number of similar proteins to return (k parameter)'
+    )
+
+    # Optional: output
+    parser.add_argument(
+        '--output',
+        default='results.csv',
+        help='Output CSV filename (default: results.csv)'
+    )
+
+    # Optional: API configuration
+    parser.add_argument(
+        '--base-url',
+        default=DEFAULT_BASE_URL,
+        help=f'AlphaFind API base URL (default: {DEFAULT_BASE_URL})'
+    )
+
+    # Optional: timeouts and polling
+    parser.add_argument(
+        '--poll-interval',
+        type=int,
+        default=DEFAULT_POLL_INTERVAL,
+        help=f'Polling interval in seconds (default: {DEFAULT_POLL_INTERVAL})'
+    )
+
+    parser.add_argument(
+        '--timeout',
+        type=int,
+        default=DEFAULT_TIMEOUT,
+        help=f'Maximum wait time in seconds (default: {DEFAULT_TIMEOUT})'
+    )
+
+    # Optional: pagination
+    parser.add_argument(
+        '--page-size',
+        type=int,
+        default=DEFAULT_PAGE_SIZE,
+        choices=range(1, MAX_PAGE_SIZE + 1),
+        metavar=f'[1-{MAX_PAGE_SIZE}]',
+        help=f'Results per page (default: {DEFAULT_PAGE_SIZE}, max: {MAX_PAGE_SIZE})'
+    )
+
+    # Optional: sorting
+    parser.add_argument(
+        '--sort-by',
+        choices=VALID_SORT_BY,
+        default=DEFAULT_SORT_BY,
+        help=f'Sort results by (default: {DEFAULT_SORT_BY})'
+    )
+
+    parser.add_argument(
+        '--sort-order',
+        choices=VALID_SORT_ORDER,
+        default=DEFAULT_SORT_ORDER,
+        help=f'Sort order (default: {DEFAULT_SORT_ORDER})'
+    )
+
+    # Mode flags
+    parser.add_argument('-v', '--verbose', action='store_true',
+                       help='Enable verbose logging')
+    parser.add_argument('-q', '--quiet', action='store_true',
+                       help='Suppress informational output (recommended for Galaxy automation)')
+    parser.add_argument('--dry-run', action='store_true',
+                       help='Show what would be done without executing')
+    parser.add_argument('--version', action='store_true',
+                       help='Show version and exit')
+
+    args = parser.parse_args()
+
+    # Handle version flag
+    if args.version:
+        print(__version__)
+        sys.exit(EXIT_SUCCESS)
+
+    # Validate required arguments
+    if not args.query and not args.dry_run:
+        parser.error('--query is required')
+
+    # Build filters from both JSON and individual params
+    filters = dict(args.filters)
+    if args.filter_organism:
+        filters['organism'] = args.filter_organism
+    if args.filter_tax_id:
+        filters['tax_id'] = args.filter_tax_id
+    if args.filter_gene_name:
+        filters['gene_name'] = args.filter_gene_name
+    if args.filter_cath_annotation:
+        filters['cath_annotation'] = args.filter_cath_annotation
+
+    # Build options from both JSON and individual params
+    options = dict(args.options)
+    if args.option_k:
+        options['k'] = args.option_k
+
+    return SearchConfig(
+        query=args.query,
+        index=args.index,
+        filters=filters,
+        options=options,
+        output_file=args.output,
+        base_url=args.base_url,
+        poll_interval=args.poll_interval,
+        timeout=args.timeout,
+        page_size=args.page_size,
+        sort_by=args.sort_by,
+        sort_order=args.sort_order,
+        verbose=args.verbose,
+        dry_run=args.dry_run,
+        quiet=args.quiet
+    )
+
+
+# ============================================================================
+# Logging
+# ============================================================================
+
+def setup_logging(verbose: bool, quiet: bool = False) -> None:
+    """Configure logging based on verbosity and quiet mode.
+
+    Args:
+        verbose: Whether to enable verbose logging
+        quiet: Whether to suppress all informational logging (for Galaxy)
+    """
+    if quiet:
+        logging.basicConfig(
+            level=logging.ERROR,
+            format='%(message)s',
+            stream=sys.stderr
+        )
+    else:
+        level = logging.DEBUG if verbose else logging.INFO
+        logging.basicConfig(
+            level=level,
+            format='%(asctime)s - %(levelname)s - %(message)s',
+            datefmt='%Y-%m-%d %H:%M:%S'
+        )
+
+
+def error_exit(message: str, exit_code: int = 1) -> None:
+    """Print error to stderr and exit.
+
+    Args:
+        message: Error message to display
+        exit_code: Exit code to return
+    """
+    sys.stderr.write(f"ERROR: {message}\n")
+    sys.stderr.flush()
+    sys.exit(exit_code)
+
+
+# ============================================================================
+# Main Application
+# ============================================================================
+
+def run_search(client: AlphaFindClient, config: SearchConfig) -> int:
+    """Execute the search workflow.
+
+    Args:
+        client: AlphaFind API client
+        config: Search configuration
+
+    Returns:
+        Exit code (0 for success, non-zero for failure)
+    """
+    show_progress = not config.quiet
+
+    # Submit search
+    query_id, index_type, status = client.submit_search(
+        query=config.query,
+        index=config.index,
+        filters=config.filters,
+        options=config.options
+    )
+
+    # Wait for completion if needed
+    if status != STATUS_COMPLETED:
+        if not config.quiet:
+            logging.info("Waiting for search to complete...")
+        status = client.wait_for_completion(
+            query_id=query_id,
+            index_type=index_type,
+            poll_interval=config.poll_interval,
+            timeout=config.timeout,
+            show_progress=show_progress
+        )
+
+    # Check final status
+    if status == STATUS_FAILED:
+        error_msg = "Search failed. No results to retrieve."
+        logging.error(error_msg)
+        if config.quiet:
+            error_exit(error_msg, EXIT_SEARCH_FAILED)
+        return EXIT_SEARCH_FAILED
+
+    # Get all results
+    results = client.get_all_results(
+        query_id=query_id,
+        index_type=index_type,
+        page_size=config.page_size,
+        sort_by=config.sort_by,
+        sort_order=config.sort_order,
+        show_progress=show_progress
+    )
+
+    # Save to CSV
+    if not results:
+        if not config.quiet:
+            logging.warning("No results found")
+        return EXIT_SUCCESS
+
+    count = results_to_csv(
+        results=results,
+        query_id=query_id,
+        index_type=index_type,
+        filename=config.output_file,
+        page_size=config.page_size
+    )
+
+    if not config.quiet:
+        logging.info(f"Successfully completed. {count} results saved to {config.output_file}")
+
+    return EXIT_SUCCESS
+
+
+def main() -> int:
+    """Main entry point.
+
+    Returns:
+        Exit code (0 for success, non-zero for failure)
+    """
+    config = parse_arguments()
+    setup_logging(config.verbose, config.quiet)
+
+    # Log configuration if not in quiet mode
+    if not config.quiet:
+        logging.info("=" * 60)
+        logging.info("AlphaFind API Client")
+        logging.info("=" * 60)
+        logging.info(f"Query: {config.query}")
+        logging.info(f"Index: {config.index or 'Server default'}")
+        logging.info(f"Filters: {config.filters}")
+        logging.info(f"Options: {config.options}")
+        logging.info(f"Output: {config.output_file}")
+        logging.info(f"Base URL: {config.base_url}")
+        logging.info(f"Timeout: {config.timeout}s, Poll interval: {config.poll_interval}s")
+        logging.info(f"Page size: {config.page_size}, Sort by: {config.sort_by} {config.sort_order}")
+        logging.info("=" * 60)
+
+    if config.dry_run:
+        logging.info("DRY RUN - No API calls will be made")
+        logging.info(f"Query: {config.query}")
+        logging.info(f"Results would be saved to: {config.output_file}")
+        return EXIT_SUCCESS
+
+    try:
+        client = AlphaFindClient(config.base_url)
+
+        # Health check (optional, just warns if fails)
+        if not client.health_check() and not config.quiet:
+            logging.warning("Health check failed. Continuing anyway...")
+
+        return run_search(client, config)
+
+    except TimeoutError as e:
+        error_msg = str(e)
+        logging.error(error_msg)
+        if config.quiet:
+            error_exit(error_msg, EXIT_TIMEOUT)
+        return EXIT_TIMEOUT
+
+    except requests.HTTPError as e:
+        response_text = e.response.text if e.response else 'No response'
+        error_msg = f"HTTP error: {e}\nResponse: {response_text}"
+        logging.error(error_msg)
+        if config.quiet:
+            error_exit(error_msg, EXIT_HTTP_ERROR)
+        return EXIT_HTTP_ERROR
+
+    except KeyboardInterrupt:
+        if not config.quiet:
+            logging.info("\nInterrupted by user")
+        return EXIT_INTERRUPTED
+
+    except Exception as e:
+        error_msg = f"Unexpected error: {e}"
+        logging.error(error_msg, exc_info=config.verbose)
+        if config.quiet:
+            error_exit(error_msg, EXIT_UNEXPECTED_ERROR)
+        return EXIT_UNEXPECTED_ERROR
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/group_vars/galaxyservers.yml b/group_vars/galaxyservers.yml
index a8f3ff20..ae5c7fea 100644
--- a/group_vars/galaxyservers.yml
+++ b/group_vars/galaxyservers.yml
@@ -295,6 +295,8 @@ galaxy_local_tools:
   - testing.xml
   - testing_pbs.xml
   - testing_html.xml
+  - alphafind.xml
+  - alphafind_search.py
 
 galaxy_extra_dirs:
   - "{{ galaxy_mutable_data_dir }}"
diff --git a/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml b/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml
index 2e842eb6..cf3cac32 100644
--- a/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml
+++ b/host_vars/galaxy-qa1.galaxy.cloud.e-infra.cz/vars.yml
@@ -1,9 +1,8 @@
-galaxy_commit_id: release_25.1
 galaxy_build_client: false
 galaxy_client_make_target: client-production
 
 galaxy_repo: https://github.com/CESNET/galaxy.git
-galaxy_commit_id: cesnet_invenio
+galaxy_commit_id: cesnet_invenio_26.0
 
 csnt_brand: "QA1-TEST-{{ galaxy_commit_id }}"
 csnt_log_level: DEBUG
diff --git a/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2 b/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2
index 7f54acdd..d248419c 100644
--- a/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2
+++ b/templates/galaxy-qa1.galaxy.cloud.e-infra.cz/config/local_tool_conf.xml.j2
@@ -3,4 +3,5 @@
     <tool file="testing.xml" />
     <tool file="testing_pbs.xml" />
     <tool file="testing_html.xml" />
+    <tool file="alphafind.xml" />
 </toolbox>
diff --git a/templates/galaxy/config/tpv_rules_meta.yml.j2 b/templates/galaxy/config/tpv_rules_meta.yml.j2
index 326a40ab..565bf241 100644
--- a/templates/galaxy/config/tpv_rules_meta.yml.j2
+++ b/templates/galaxy/config/tpv_rules_meta.yml.j2
@@ -253,11 +253,13 @@ tools:
     scheduling:
       require:
         - alphafold
+  alphafind_search*:
+    inherits: local_running_tools
   toolshed.g2.bx.psu.edu/repos/bgruening/llm_hub/llm_hub/.*:
     inherits: local_running_tools
     env:
       LITELLM_CONFIG_FILE: "{{ galaxy_config_dir }}/llm_hub_config.yaml"
-     
+
 
 roles:
   training.*: