Source code for haive.mcp.registry.server_converter

#!/usr/bin/env python3
"""Server Registry Converter for Phase 3+.

This module converts GitHub-based server entries from the 1900+ server database
into npm package format for the MCP Manager registry.

Features:
- Converts GitHub URLs to potential npm package names
- Validates npm package existence
- Generates category mappings
- Creates installable server lists

Usage:
    from haive.mcp.registry.server_converter import ServerConverter
    
    converter = ServerConverter()
    
    # Convert known patterns
    npm_packages = await converter.convert_github_to_npm_batch([
        "https://github.com/modelcontextprotocol/server-time",
        "https://github.com/some-org/mcp-server-custom"
    ])
    
    # Validate packages exist on npm
    valid_packages = await converter.validate_npm_packages(npm_packages)
"""

import asyncio
import json
import logging
import re
from typing import Dict, List, Optional, Set, Tuple
from pathlib import Path
from dataclasses import dataclass

import aiohttp
from pydantic import BaseModel, Field

logger = logging.getLogger(__name__)


[docs] @dataclass class ServerConversion: """Result of converting a GitHub server to npm package format.""" github_url: str potential_npm_packages: List[str] validated_package: Optional[str] = None category_guess: Optional[str] = None confidence: float = 0.0
[docs] class NPMPackageValidator: """Validates whether npm packages exist and are installable.""" def __init__(self): self.session: Optional[aiohttp.ClientSession] = None self._cache: Dict[str, bool] = {} async def __aenter__(self): self.session = aiohttp.ClientSession() return self async def __aexit__(self, *args): if self.session: await self.session.close()
[docs] async def package_exists(self, package_name: str) -> bool: """Check if an npm package exists.""" if package_name in self._cache: return self._cache[package_name] try: # Check npm registry url = f"https://registry.npmjs.org/{package_name}" async with self.session.get(url) as response: exists = response.status == 200 self._cache[package_name] = exists return exists except Exception as e: logger.warning(f"Error checking package {package_name}: {e}") self._cache[package_name] = False return False
[docs] async def validate_batch(self, package_names: List[str]) -> Dict[str, bool]: """Validate multiple packages in parallel.""" tasks = [self.package_exists(name) for name in package_names] results = await asyncio.gather(*tasks, return_exceptions=True) validated = {} for package_name, result in zip(package_names, results): if isinstance(result, Exception): validated[package_name] = False else: validated[package_name] = result return validated
[docs] class ServerConverter: """Converts server entries from GitHub format to npm package format.""" def __init__(self): self.validator = NPMPackageValidator() # Common patterns for npm package conversion self.conversion_patterns = [ # Official modelcontextprotocol packages (r"github\.com/modelcontextprotocol/server-(\w+)", r"@modelcontextprotocol/server-\1"), # Community packages with standard naming (r"github\.com/[\w-]+/mcp-server-(\w+)", r"mcp-server-\1"), (r"github\.com/[\w-]+/(\w+)-mcp-server", r"\1-mcp-server"), (r"github\.com/[\w-]+/(\w+)-mcp", r"\1-mcp"), # Custom org packages (r"github\.com/([\w-]+)/mcp-(\w+)", r"@\1/mcp-\2"), ] # Category mapping based on server names and descriptions self.category_mapping = { # Development "filesystem": "development", "git": "development", "github": "development", "gitlab": "development", "docker": "development", "kubernetes": "development", "aws": "development", # Data "postgres": "data", "mysql": "data", "sqlite": "data", "mongodb": "data", "s3": "data", "gdrive": "data", "memory": "data", # Productivity "search": "productivity", "brave": "productivity", "google": "productivity", "slack": "productivity", "notion": "productivity", "calendar": "productivity", "time": "productivity", # AI "thinking": "ai", "sequential": "ai", "image": "ai", "speech": "ai", "text": "ai", # Web APIs "twitter": "web_apis", "reddit": "web_apis", "youtube": "web_apis", "stripe": "web_apis", "analytics": "web_apis", # Security "nmap": "security", "whois": "security", "ssl": "security", "vault": "security", "1password": "security", }
[docs] def convert_github_to_npm_candidates(self, github_url: str) -> List[str]: """Convert a GitHub URL to potential npm package names.""" candidates = [] # Try each conversion pattern for pattern, replacement in self.conversion_patterns: match = re.search(pattern, github_url) if match: npm_name = re.sub(pattern, replacement, github_url) candidates.append(npm_name) # Extract repo name and create generic candidates repo_match = re.search(r"github\.com/[\w-]+/([\w-]+)", github_url) if repo_match: repo_name = repo_match.group(1) # Add common patterns candidates.extend([ f"@modelcontextprotocol/{repo_name}", f"mcp-{repo_name}", f"{repo_name}-mcp", repo_name, ]) return list(set(candidates)) # Remove duplicates
[docs] def guess_category(self, server_name: str, description: str = "") -> Optional[str]: """Guess the category based on server name and description.""" text = f"{server_name} {description}".lower() for keyword, category in self.category_mapping.items(): if keyword in text: return category return "utility" # Default category
[docs] async def convert_server_entry(self, github_url: str, description: str = "") -> ServerConversion: """Convert a single server entry.""" candidates = self.convert_github_to_npm_candidates(github_url) # Validate which candidates actually exist async with self.validator: validation_results = await self.validator.validate_batch(candidates) # Find best validated package validated_package = None confidence = 0.0 for candidate in candidates: if validation_results.get(candidate, False): validated_package = candidate # Prefer official packages if candidate.startswith("@modelcontextprotocol/"): confidence = 0.9 break elif confidence < 0.7: confidence = 0.7 # Guess category category = self.guess_category(github_url, description) return ServerConversion( github_url=github_url, potential_npm_packages=candidates, validated_package=validated_package, category_guess=category, confidence=confidence )
[docs] async def convert_batch(self, server_entries: List[Dict]) -> List[ServerConversion]: """Convert multiple server entries in parallel.""" tasks = [] for entry in server_entries: github_url = entry.get("repository_url", "") description = entry.get("description", "") task = self.convert_server_entry(github_url, description) tasks.append(task) return await asyncio.gather(*tasks, return_exceptions=True)
[docs] def create_registry_categories(self, conversions: List[ServerConversion]) -> Dict[str, List[str]]: """Create registry categories from validated conversions.""" categories = {} for conversion in conversions: if not conversion.validated_package or conversion.confidence < 0.5: continue category = conversion.category_guess or "utility" if category not in categories: categories[category] = [] categories[category].append(conversion.validated_package) return categories
[docs] async def main(): """Example usage of ServerConverter.""" converter = ServerConverter() # Test with some example GitHub URLs test_urls = [ "https://github.com/modelcontextprotocol/server-filesystem", "https://github.com/modelcontextprotocol/server-github", "https://github.com/someone/mcp-server-custom", "https://github.com/example/custom-mcp-server", ] print("🔄 Converting GitHub URLs to npm packages...") for url in test_urls: result = await converter.convert_server_entry(url) print(f"\n📦 {url}") print(f" Candidates: {result.potential_npm_packages}") print(f" Validated: {result.validated_package}") print(f" Category: {result.category_guess}") print(f" Confidence: {result.confidence:.1f}")
if __name__ == "__main__": asyncio.run(main())