diff --git a/src/main.rs b/src/main.rs index dcd2aca..9ea6a89 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,242 +1,313 @@ use clap::Parser; use serde::Deserialize; +use std::collections::{HashMap, HashSet}; use std::fs; +use std::io::{self, Write}; use std::path::{Path, PathBuf}; use tracing::{Level, error, info, instrument, warn}; use tracing_subscriber; -// Represents the command-line arguments. #[derive(Parser, Debug)] #[command(name = "gitea-mirror")] -#[command( - about = "Ensures Git repositories are mirrored to Gitea, generated with Gemini 2.5 Web Canvas" -)] -#[clap(author, version, about, long_about = None)] +#[command(about = "Syncs Git repositories to Gitea based on a TOML config.")] struct Args { /// Path to the TOML configuration file. #[clap(short, long, value_parser, env = "GITEA_MIRROR_CONFIG_FILEPATH")] config: PathBuf, - /// Perform a dry run without creating any migrations. + /// Calculate the plan but do not execute API calls. #[clap(short, long, default_value_t = false)] dry_run: bool, + + /// Skip the interactive confirmation prompt. + #[clap(long, default_value_t = false)] + no_confirm: bool, } -// Represents a single repository entry in the config file. #[derive(Deserialize, Debug, Clone)] struct RepoConfig { url: String, rename: Option, } -// Represents a single organization entry in the config file. #[derive(Deserialize, Debug, Clone)] struct OrgConfig { url: String, api_key: Option, } -// Represents the main structure of the TOML configuration file. #[derive(Deserialize, Debug)] struct Config { gitea_url: String, api_key: String, repos: Option>, organizations: Option>, - repo_owner: Option, // Optional owner username/org for all migrated repos + repo_owner: Option, } -// Represents the payload for creating a migration in Gitea. #[derive(serde::Serialize, Debug)] struct MigrateRepoPayload<'a> { clone_addr: &'a str, repo_name: &'a str, - repo_owner: &'a str, // Username or organization name + repo_owner: &'a str, mirror: bool, private: bool, description: &'a str, } -// Represents a user as returned by the Gitea API. #[derive(Deserialize, Debug)] struct GiteaUser { login: String, } -/// Entry point of the application. #[tokio::main] async fn main() -> Result<(), Box> { - // Initialize the tracing subscriber for logging. tracing_subscriber::fmt().with_max_level(Level::INFO).init(); - - // Parse command-line arguments or get config path from environment variable. let args = Args::parse(); - - info!("Starting Gitea mirror process. Dry run: {}", args.dry_run); - - // Read and parse the configuration file. let config = load_config(&args.config)?; let http_client = reqwest::Client::new(); - // Determine the owner (either from repo_owner or authenticated user) + // 1. Determine Target Owner let owner_name = if let Some(owner) = &config.repo_owner { - info!("Using specified repo_owner: {}", owner); owner.clone() } else { - info!("No repo_owner specified, fetching authenticated user"); get_authenticated_username(&http_client, &config.gitea_url, &config.api_key).await? }; + info!("Target Owner: {}", owner_name); - info!("Using owner '{}' for all migrated repositories", owner_name); + // 2. Build 'Desired' State (Map) + info!("Resolving desired state from configuration..."); + let mut desired_repos: HashMap = HashMap::new(); - // Process repositories from the static list. + // 2a. Static Repos if let Some(repos) = &config.repos { - for repo_config in repos { - process_repo( - &repo_config.url, - repo_config.rename.as_deref(), - &owner_name, - &http_client, - &config, - args.dry_run, - ) - .await?; + for r in repos { + let name = r + .rename + .as_deref() + .or_else(|| extract_repo_name(&r.url)) + .ok_or_else(|| format!("Invalid URL: {}", r.url))?; + desired_repos.insert(name.to_string(), r.url.clone()); } } - // Process repositories from the organizations/users list. - if let Some(org_configs) = &config.organizations { - for org_config in org_configs { - info!( - "Fetching repositories from organization: {}", - org_config.url - ); - match fetch_org_repos(&http_client, &org_config.url, org_config.api_key.as_deref()) - .await - { - Ok(repo_urls) => { - info!( - "Found {} repositories for {}", - repo_urls.len(), - org_config.url - ); - for url in repo_urls { - process_repo( - &url, - None, // No rename support for orgs - &owner_name, - &http_client, - &config, - args.dry_run, - ) - .await?; - } + // 2b. Organization Repos + if let Some(orgs) = &config.organizations { + for org in orgs { + info!("Fetching repos from source: {}", org.url); + let urls = + fetch_external_org_repos(&http_client, &org.url, org.api_key.as_deref()).await?; + for url in urls { + if let Some(name) = extract_repo_name(&url) { + desired_repos.insert(name.to_string(), url); } - Err(e) => error!("Failed to fetch repos from {}: {}", org_config.url, e), } } } - info!("Gitea mirror process completed."); + // 3. Build 'Current' State (Set) + info!("Fetching existing repositories from Gitea ({})", owner_name); + let existing_repos = fetch_all_target_repos(&http_client, &config, &owner_name).await?; + let existing_set: HashSet = existing_repos.into_iter().collect(); + + // 4. Calculate Diff + let mut to_add: Vec<(String, String)> = desired_repos + .iter() + .filter(|(name, _)| !existing_set.contains(*name)) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + // Sort for consistent output + to_add.sort_by(|a, b| a.0.cmp(&b.0)); + + let mut to_delete: Vec = existing_set + .iter() + .filter(|name| !desired_repos.contains_key(*name)) + .cloned() + .collect(); + + to_delete.sort(); + + let mut to_keep: Vec = desired_repos + .keys() + .filter(|name| existing_set.contains(*name)) + .cloned() + .collect(); + + to_keep.sort(); + + // 5. Present Plan + println!("\n--- Execution Plan ---"); + for name in &to_keep { + println!(" [=] KEEP: {}", name); + } + for (name, url) in &to_add { + println!(" [+] ADD: {} (Source: {})", name, url); + } + for name in &to_delete { + println!(" [-] DELETE: {}", name); + } + println!("----------------------"); + println!( + "Summary: {} to add, {} to delete, {} unchanged.", + to_add.len(), + to_delete.len(), + to_keep.len() + ); + + if to_add.is_empty() && to_delete.is_empty() { + info!("Sync complete. No changes detected."); + return Ok(()); + } + + // 6. Confirmation / Dry Run + if args.dry_run { + info!("Dry run enabled. Exiting without changes."); + return Ok(()); + } + + if !args.no_confirm { + print!("\nProceed with these changes? [y/N]: "); + io::stdout().flush()?; + let mut input = String::new(); + io::stdin().read_line(&mut input)?; + if !input.trim().eq_ignore_ascii_case("y") { + info!("Aborted by user."); + return Ok(()); + } + } + + // 7. Execute + // Additions + for (name, url) in to_add { + info!("Migrating {}...", name); + let payload = MigrateRepoPayload { + clone_addr: &url, + repo_name: &name, + repo_owner: &owner_name, + mirror: true, + private: false, + description: "Mirrored via gitea-mirror", + }; + + match create_migration(&http_client, &config, &payload).await { + Ok(_) => info!("Successfully migrated {}", name), + Err(e) => error!("Failed to migrate {}: {}", name, e), + } + } + + // Deletions + for name in to_delete { + info!("Deleting {}...", name); + match delete_repo(&http_client, &config, &owner_name, &name).await { + Ok(_) => info!("Successfully deleted {}", name), + Err(e) => error!("Failed to delete {}: {}", name, e), + } + } + + info!("Process completed."); Ok(()) } -/// Loads and parses the TOML configuration file. +// --- Helpers --- + #[instrument(skip(path))] fn load_config(path: &Path) -> Result> { - info!("Loading configuration from: {:?}", path); let content = fs::read_to_string(path)?; let config: Config = toml::from_str(&content)?; Ok(config) } -/// Fetches the authenticated user's login name from Gitea. -#[instrument(skip(http_client, gitea_url, api_key))] +fn extract_repo_name(url: &str) -> Option<&str> { + url.split('/').last().map(|s| s.trim_end_matches(".git")) +} + +// --- API Calls --- + async fn get_authenticated_username( - http_client: &reqwest::Client, - gitea_url: &str, + client: &reqwest::Client, + base_url: &str, api_key: &str, ) -> Result { - let url = format!("{}/api/v1/user", gitea_url); - let user: GiteaUser = http_client + let url = format!("{}/api/v1/user", base_url); + let user: GiteaUser = client .get(&url) - .header("Authorization", format!("token {}", api_key)) + .bearer_auth(api_key) .send() .await? .error_for_status()? .json() .await?; - info!("Authenticated as user: {}", user.login); Ok(user.login) } -/// Checks if a repository already exists in Gitea for the user. -#[instrument(skip(http_client, gitea_url, api_key))] -async fn repo_exists( - http_client: &reqwest::Client, - gitea_url: &str, - api_key: &str, - repo_name: &str, -) -> Result { - let url = format!("{}/api/v1/repos/search", gitea_url); - let response: serde_json::Value = http_client - .get(&url) - .query(&[("q", repo_name), ("limit", "1")]) - .header("Authorization", format!("token {}", api_key)) - .send() - .await? - .error_for_status()? - .json() - .await?; +/// Fetches ALL repos for the target owner on the Gitea instance. +/// Handles pagination to ensure we have the complete state for syncing. +async fn fetch_all_target_repos( + client: &reqwest::Client, + config: &Config, + owner: &str, +) -> Result, Box> { + let mut names = Vec::new(); + let mut page = 1; + // Try organization endpoint first, fall back to user? + // Gitea distinguishes /orgs/{org}/repos and /users/{user}/repos. + // To be safe, we search via search API restricted to owner, or try both. + // Simplest compliant way: /repos/search?uid={owner_id} or q=&owner={owner} + + // Let's use the specific search endpoint which is robust. + let base_url = format!("{}/api/v1/repos/search", config.gitea_url); + + loop { + let params = [ + ("owner", owner), + ("limit", "50"), + ("page", &page.to_string()), + ]; + + let res = client + .get(&base_url) + .bearer_auth(&config.api_key) + .query(¶ms) + .send() + .await? + .error_for_status()?; + + let json: serde_json::Value = res.json().await?; + let data = json + .get("data") + .and_then(|d| d.as_array()) + .ok_or("Invalid API response")?; + + if data.is_empty() { + break; + } - if let Some(data) = response.get("data").and_then(|d| d.as_array()) { for repo in data { if let Some(name) = repo.get("name").and_then(|n| n.as_str()) { - if name.eq_ignore_ascii_case(repo_name) { - return Ok(true); - } + names.push(name.to_string()); } } + page += 1; } - - Ok(false) + Ok(names) } -/// Creates a mirror migration in Gitea. -#[instrument(skip(http_client, config, payload))] -async fn create_migration( - http_client: &reqwest::Client, - config: &Config, - payload: &MigrateRepoPayload<'_>, -) -> Result<(), reqwest::Error> { - let url = format!("{}/api/v1/repos/migrate", config.gitea_url); - http_client - .post(&url) - .header("Authorization", format!("token {}", config.api_key)) - .json(payload) - .send() - .await? - .error_for_status()?; - Ok(()) -} - -/// Fetches all repository clone URLs from a given Gitea/GitHub organization/user page. -#[instrument(skip(http_client, api_key))] -async fn fetch_org_repos( - http_client: &reqwest::Client, +/// Fetches clone URLs from external source (GitHub/Gitea). +async fn fetch_external_org_repos( + client: &reqwest::Client, org_url: &str, api_key: Option<&str>, ) -> Result, Box> { - // This is a simplified fetcher. It assumes Gitea API compatibility. - // For GitHub, you might need a different base URL and auth method. let api_url = if org_url.contains("github.com") { let parts: Vec<&str> = org_url.trim_end_matches('/').split('/').collect(); let user_or_org = parts.last().ok_or("Invalid GitHub URL")?; format!("https://api.github.com/users/{}/repos", user_or_org) } else { - // Assuming Gitea-like URL structure + // Assuming Gitea let parts: Vec<&str> = org_url.trim_end_matches('/').split('/').collect(); let user_or_org = parts.last().ok_or("Invalid Gitea URL")?; + // Heuristic to find API endpoint from web URL format!( "{}s/{}/repos", org_url.replace(user_or_org, &format!("api/v1/user")), @@ -244,35 +315,29 @@ async fn fetch_org_repos( ) }; - info!("Querying API endpoint: {}", api_url); - - let mut repos: Vec = Vec::new(); + let mut repos = Vec::new(); let mut page = 1; + loop { - let mut request_builder = http_client + let mut req = client .get(&api_url) .query(&[("page", page.to_string())]) - // For GitHub, a User-Agent is required. - .header("User-Agent", "gitea-mirror-rust-client"); + .header("User-Agent", "gitea-mirror-rust"); if let Some(key) = api_key { - request_builder = request_builder.header("Authorization", format!("token {}", key)); + req = req.bearer_auth(key); } - let response: Vec = request_builder - .send() - .await? - .error_for_status()? - .json() - .await?; + let res = req.send().await?.error_for_status()?; + let json: Vec = res.json().await?; - if response.is_empty() { - break; // No more pages + if json.is_empty() { + break; } - for repo in response { - if let Some(clone_url) = repo.get("clone_url").and_then(|u| u.as_str()) { - repos.push(clone_url.to_string()); + for repo in json { + if let Some(url) = repo.get("clone_url").and_then(|u| u.as_str()) { + repos.push(url.to_string()); } } page += 1; @@ -281,53 +346,34 @@ async fn fetch_org_repos( Ok(repos) } -/// Core logic to process a single repository. -#[instrument(skip(owner_name, http_client, config, dry_run))] -async fn process_repo( - repo_url: &str, - rename: Option<&str>, - owner_name: &str, - http_client: &reqwest::Client, +async fn create_migration( + client: &reqwest::Client, config: &Config, - dry_run: bool, -) -> Result<(), Box> { - let repo_name = match rename { - Some(name) => name, - None => extract_repo_name(repo_url).ok_or("Could not extract repo name from URL")?, - }; - - info!("Processing repo '{}' -> '{}'", repo_url, repo_name); - - if repo_exists(http_client, &config.gitea_url, &config.api_key, repo_name).await? { - info!("Repo '{}' already exists. Skipping.", repo_name); - } else { - warn!("Repo '{}' does not exist. Migration needed.", repo_name); - if !dry_run { - info!("Initiating migration for '{}'...", repo_name); - let payload = MigrateRepoPayload { - clone_addr: repo_url, - repo_name, - repo_owner: owner_name, - mirror: true, - private: false, // Defaulting to public, change if needed - description: "", - }; - if let Err(e) = create_migration(http_client, config, &payload).await { - error!("Failed to create migration for '{}': {}", repo_name, e); - } else { - info!("Successfully started migration for '{}'.", repo_name); - } - } else { - info!( - "Dry run enabled. Skipping actual migration for '{}'.", - repo_name - ); - } - } + payload: &MigrateRepoPayload<'_>, +) -> Result<(), reqwest::Error> { + let url = format!("{}/api/v1/repos/migrate", config.gitea_url); + client + .post(&url) + .bearer_auth(&config.api_key) + .json(payload) + .send() + .await? + .error_for_status()?; Ok(()) } -/// Extracts a repository name from a git URL (e.g., "https://.../repo.git" -> "repo"). -fn extract_repo_name(url: &str) -> Option<&str> { - url.split('/').last().map(|s| s.trim_end_matches(".git")) +async fn delete_repo( + client: &reqwest::Client, + config: &Config, + owner: &str, + repo_name: &str, +) -> Result<(), reqwest::Error> { + let url = format!("{}/api/v1/repos/{}/{}", config.gitea_url, owner, repo_name); + client + .delete(&url) + .bearer_auth(&config.api_key) + .send() + .await? + .error_for_status()?; + Ok(()) }