1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
/// Arguments used across all worker operations
mod arguments;

/// Module used to download raw files from the internet and mirror them into
/// a working directory.
mod download;

/// Fetch git sources into a workspace
mod fetch;

///Representation of `.lorry` specification files
pub mod lorry_specs;

/// Responsible for importing files into the file system for LFS backed mirrors
mod importer;

/// Wrapper for running low level git commands and initializing the underlying
/// mirror cache directory on a per-mirror basis
pub mod workspace;

/// Configure the global settings of all git-binary based operations.
pub mod git_config;

/// Execute internal commands
pub mod execute;

/// Helper wrapper for mirroring arbitrary raw files into an LFS backed remote
/// downstream repository.
mod raw_files;

/// Helper command builder for pushing mirrors downstream
mod push;

/// Internal server for various integration tests
#[cfg(test)]
mod test_server;

use std::collections::BTreeMap;

use crate::execute::{execute, Error as ExecutionError};
pub use arguments::Arguments;
pub use lorry_specs::extract_lorry_specs;
pub use lorry_specs::LorrySpec;

use git2::{Error as Libgit2Error, Repository};
use glob::Pattern;
use lorry_specs::SingleLorry;
use thiserror::Error;
use url::Url;

/// Extra header exposed on Git fetch uperations to upstream servers and
/// push operations on downstream.
pub const LORRY_VERSION_HEADER: &str = concat!("Lorry-Version: ", env!("CARGO_PKG_VERSION"));

pub const DEFAULT_BRANCH_NAME: &str = "main";
pub const DEFAULT_REF_NAME: &str = "refs/heads/main";

/// Non-fatal error encountered when running a Lorry.
#[derive(Clone, Debug)]
pub enum Warning {
    /// Anytime a refspec is specified but it doesn't match any refs in the
    /// upstream repository.
    NonMatchingRefSpecs {
        /// Pattern from the configuration that did not match any refs
        pattern: String,
    },
}

impl Warning {
    /// Return a nicely formatted name
    pub fn name(&self) -> String {
        match self {
            Warning::NonMatchingRefSpecs { pattern: _ } => String::from("NonMatchingRefSpec"),
        }
    }

    /// Return the body of the error message
    pub fn message(&self) -> String {
        match self {
            Warning::NonMatchingRefSpecs { pattern } => pattern.clone(),
        }
    }
}

/// Results of a parsing refs from porcelain git push output
/// example command:
/// git push --porcelain origin 'refs/heads/*' 2>/dev/null
#[derive(Clone, Debug, Default)]
#[allow(dead_code)]
pub struct PushRefs(pub Vec<(String, String)>, pub Vec<(String, String)>);

/// Contains the status of a single mirror operation
#[derive(Default)]
pub struct MirrorStatus {
    pub push_refs: PushRefs,
    pub warnings: Vec<Warning>,
}

#[derive(
    Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum, Debug, serde::Deserialize,
)]
pub enum WhenToOperate {
    #[serde(rename = "first")]
    First,

    #[serde(rename = "never")]
    Never,

    #[serde(rename = "always")]
    Always,
}

/// Top-level error type containing all possible errors during a mirroring operation.
#[derive(Error, Debug)]
pub enum Error {
    /// The upstream was a git repo. There was an error pulling from the remote.
    #[error("Failed to fetch upstream: {0}")]
    Fetch(#[from] fetch::Error),

    /// Indicates that the job exceeded the time it was allocated in the
    /// lorry configuration
    #[error("job exceeded the allocated timeout: {seconds}")]
    TimeoutExceeded { seconds: u64 },

    /// An error occurred that originated in the workspace module.
    #[error("Workspace Error: {0}")]
    WorkspaceError(#[from] workspace::Error),

    /// An error occured when importing raw files into a working directory.
    #[error("LFS Importer Error: {0}")]
    LFSImporter(#[from] importer::Error),

    /// Generic command execution error not specific to any particular module.
    #[error("Command Execution Error: {0}")]
    Command(#[from] ExecutionError),

    /// Generic IO error
    #[error("IO Error")]
    Io { command: String },
    /// Tried to access a repository on the local filesystem as the downstream,
    /// but the given path is not a valid one.
    #[error(
        "Downstream is specified as a path on the local filesystem, but the path is malformed."
    )]
    InvalidPath(String),

    /// Problem parsing a downstream git url
    #[error("Failed to construct path to downstream git repo")]
    ParseError(#[from] url::ParseError),

    /// Generic Libgit2 failure not specific to any particular module.
    #[error("Internal git failure")]
    Libgit2Error(#[from] Libgit2Error),

    /// Indicates that 100% of the desired refs failed to push to the
    /// downstream.
    #[error("All ({n_attempted}) refspecs failed")]
    AllRefspecsFailed { refs: PushRefs, n_attempted: i64 },

    /// Indicates some but not all of the desired refspecs were not able to
    /// be pushed downstream for some reason.
    #[error("{n_failed} refspecs failed")]
    SomeRefspecsFailed { refs: PushRefs, n_failed: i64 },

    /// Indicates a refspec was malformed
    #[error("Invalid Glob: {pattern} - {error}")]
    InvalidGlobPattern { pattern: String, error: String },

    /// An error related to raw-file management occurred.
    #[error("Raw File Related Error: {0}")]
    RawFiles(#[from] raw_files::Error),

    /// Indicates none of the refs in the mirror configuration matches those
    /// that were available in the repository.
    #[error("No Matching Refspecs")]
    NoMatchingRefspecs,

    /// Indicates that a push command failed and we could not understand the
    /// the reason why. This may be due to a downstream server error.
    #[error("Cannot parse push output: {0}")]
    CannotParsePushOutput(String),

    /// Indicates that the server requires Sha256sum values for all raw-files
    /// but the file did not have one specified.
    #[error("Sha256sums are missing: \n{0}")]
    Sha256sumsNotSpecified(String),

    /// Indicates the ignore pattern was not a valid regular expression
    #[error("Invalid ignore pattern: {0}")]
    InvalidIgnorePattern(String),
}

impl Error {
    /// return the underlying status code from the error if the error is the
    /// result of an command execution failure otherwise return nothing.
    #[allow(clippy::collapsible_match)]
    pub fn status(&self) -> Option<i32> {
        match self {
            Error::Fetch(e) => match e {
                fetch::Error::Command(e) => e.status(),
                _ => None,
            },
            Error::Command(e) => match e {
                ExecutionError::IO {
                    command: _,
                    source: _,
                } => None,
                ExecutionError::CommandError {
                    command: _,
                    status,
                    stderr: _,
                    stdout: _,
                } => Some(status.code().unwrap_or(-1)),
            },
            _ => None,
        }
    }
}

fn get_refs(n_refs: usize, stdout: &str) -> Result<PushRefs, Error> {
    match crate::push::Push::parse_output(stdout) {
        Ok(results) => {
            if results.1.len() == n_refs {
                return Err(Error::AllRefspecsFailed {
                    refs: results.clone(),
                    n_attempted: n_refs as i64,
                });
            }
            if !results.1.is_empty() {
                Err(Error::SomeRefspecsFailed {
                    refs: results.clone(),
                    n_failed: results.1.len() as i64,
                })
            } else {
                Ok(results)
            }
        }
        Err(message) => {
            tracing::warn!("Failed to parse git push output:\n{}", message);
            Err(Error::CannotParsePushOutput(message))
        }
    }
}

/// Given all of the refs return a vec of those with some matches and a vec of
/// any patterns that didn't result in any matches at all.
fn parse_refs(
    refs: &[String],
    ref_patterns: Option<&[String]>,
    ignore_patterns: Option<&[String]>,
) -> Result<(Vec<String>, Vec<String>), Error> {
    let ignore_globs = ignore_patterns
        .as_ref()
        .map(|ignore_patterns| {
            ignore_patterns
                .iter()
                .try_fold(Vec::new(), |mut accm, pattern| {
                    let glob_pattern =
                        Pattern::new(pattern).map_err(|e| Error::InvalidGlobPattern {
                            pattern: pattern.clone(),
                            error: e.to_string(),
                        })?;
                    accm.push(glob_pattern);
                    Ok::<Vec<Pattern>, Error>(accm)
                })
        })
        .transpose()?;

    let refs: Vec<String> = refs
        .iter()
        .filter_map(|ref_name| {
            if ignore_globs.as_ref().is_some_and(|ignore_globs| {
                ignore_globs.iter().any(|pattern| pattern.matches(ref_name))
            }) {
                None
            } else {
                Some(ref_name.clone())
            }
        })
        .collect();

    if let Some(ref_specs) = ref_patterns {
        let mut patterns_with_matches: BTreeMap<String, bool> = BTreeMap::new();
        let globs: Vec<Pattern> = ref_specs.iter().try_fold(Vec::new(), |mut accm, pattern| {
            match Pattern::new(pattern) {
                Ok(regex) => {
                    patterns_with_matches.insert(pattern.clone(), false);
                    accm.push(regex);
                    Ok(accm)
                }
                Err(e) => Err(Error::InvalidGlobPattern {
                    pattern: pattern.clone(),
                    error: e.to_string(),
                }),
            }
        })?;

        return Ok((
            refs.iter().fold(Vec::new(), |mut accm, ref_name| {
                if let Some(matching_glob) = globs.iter().find_map(|glob_pattern| {
                    if glob_pattern.matches(ref_name)
                        || glob_pattern.matches(ref_name.trim_start_matches("refs/heads/"))
                        || glob_pattern.matches(ref_name.trim_start_matches("refs/tags/"))
                    {
                        Some(glob_pattern.as_str().to_string())
                    } else {
                        None
                    }
                }) {
                    patterns_with_matches.insert(matching_glob, true);
                    accm.push(ref_name.clone());
                };
                accm
            }),
            patterns_with_matches
                .iter()
                .filter_map(|(pattern, had_match)| {
                    if !had_match {
                        Some(pattern.clone())
                    } else {
                        None
                    }
                })
                .collect(),
        ));
    } else {
        Ok((refs.to_vec(), vec![]))
    }
}

/// Push a git based mirror to the downstream server
/// TODO: Add option to use libgit2 for push in addition to pull on git mirrors.
#[allow(deprecated)]
async fn push_to_mirror_server(
    lorry_spec: &SingleLorry,
    lorry_name: &str,
    url_to_push_to: &Url,
    active_repo: &workspace::Workspace,
    arguments: &arguments::Arguments,
) -> Result<MirrorStatus, Error> {
    if url_to_push_to.scheme() == "file" {
        let url_to_push_to = url_to_push_to
            .to_file_path()
            .map_err(|_| Error::InvalidPath(url_to_push_to.to_string()))?;
        //this repo is bieng pushed to the local filesystem
        //make sure it exists first
        if !url_to_push_to.exists() {
            // TODO: support non-gitlab
            tracing::info!("Creating local repository: {:?}", url_to_push_to);
            unimplemented!("cannot support local mirror pushing yet");
        }
    }

    tracing::debug!("Pushing {} to mirror at {:?}", lorry_name, &url_to_push_to);

    let repository = Repository::open(active_repo.repository_path())?;
    let ref_names = repository
        .references()?
        .try_fold(Vec::new(), |mut accm, reference| {
            let ref_name = match reference {
                Ok(ref_name) => {
                    let name = ref_name.name().unwrap();
                    name.to_string()
                }
                Err(err) => return Err(err),
            };
            accm.push(ref_name);
            Ok(accm)
        })?;

    let (refs, missing) = parse_refs(
        ref_names.as_slice(),
        lorry_spec.ref_patterns.as_deref(),
        lorry_spec.ignore_patterns.as_deref(),
    )?;
    tracing::info!("pushing {} refs", refs.len());

    if refs.is_empty() {
        return Err(Error::NoMatchingRefspecs);
    }

    let repository_path = active_repo.repository_path();
    let push = &crate::push::Push {
        url: url_to_push_to,
        ref_names: refs.iter().map(|s| s.as_str()).collect(),
        config_path: &arguments.git_config_path,
    };

    let push_refs = match execute(push, repository_path.as_path()).await {
        Ok((stdout, _)) => get_refs(refs.len(), &stdout),
        Err(err) => match err {
            ExecutionError::IO { command, source } => {
                tracing::warn!("Command failed to spawn: {:?}", source.to_string());
                Err(Error::Io {
                    command: command.clone(),
                })
            }
            ExecutionError::CommandError {
                command: _,
                status: _,
                stderr: _,
                stdout,
            } => get_refs(refs.len(), &stdout),
        },
    }?;

    Ok(MirrorStatus {
        push_refs,
        warnings: missing
            .iter()
            .map(|pattern| Warning::NonMatchingRefSpecs {
                pattern: pattern.clone(),
            })
            .collect(),
    })
}

/// Attempt to mirror a repository from an upstream host into the configured
/// downstream server. NOTE: The only supported downstream configuration is
/// currently Gitlab.
///
/// Rough outline of the order of operations is:
///
/// * Attempt to fetch repository data from the downstream into the working
///     directory of the local mirror.
///
/// * Fetch the upstream repository on top of the working directory of the
///     local mirror applying new updates.
///
/// * Push the local mirror back into the downstream updating it with new refs.
///
/// Fetching the downstream repository initially ensures that Lorry can work
/// in a stateless environment and will consider the downstream mirror the
/// source of truth.
///
/// This workflow applies to both LFS and normal Git mirrors. In the case of
/// LFS the raw files are also downloaded into the working directory prior to
/// running download operations.
///
/// In the event that the downstream repository becomes corrupted in someway
/// the procedure is to delete and reinitialize an empty repository in which
/// case the upstream mirror will be re-imported from scratch.
///
/// TODO: This code can be factored out better
pub async fn try_mirror(
    lorry_details: &LorrySpec,
    lorry_name: &str,
    downstream_url: &url::Url,
    workspace: &workspace::Workspace,
    arguments: &Arguments,
) -> Result<MirrorStatus, Error> {
    match lorry_details {
        LorrySpec::Git(single_lorry) => {
            tracing::info!("Ensuring local mirror is consistent with downstream");
            crate::fetch::Fetch {
                git_repo: workspace,
                target_url: downstream_url,
                use_git_binary: arguments.use_git_binary,
                git_config_path: arguments.git_config_path.as_path(),
            }
            .fetch()
            .await?;
            tracing::info!("Fetching upstream repository into local mirror");
            crate::fetch::Fetch {
                git_repo: workspace,
                target_url: &url::Url::parse(single_lorry.url.as_str()).unwrap(), // FIXME
                use_git_binary: arguments.use_git_binary,
                git_config_path: arguments.git_config_path.as_path(),
            }
            .fetch()
            .await?;

            push_to_mirror_server(
                single_lorry,
                lorry_name,
                downstream_url,
                workspace,
                arguments,
            )
            .await
        }
        LorrySpec::RawFiles(raw_files) => {
            // check for missing sha256sums if configuration disallows them
            if arguments.sha256sum_required {
                let missing_sha256sums = raw_files.missing_sha256sums();
                if !missing_sha256sums.is_empty() {
                    let mut message = String::default();
                    missing_sha256sums
                        .iter()
                        .for_each(|url| message.push_str(&format!("{}\n", url)));
                    return Err(Error::Sha256sumsNotSpecified(message));
                }
            }
            tracing::info!("Fetching raw files from downstream to ensure consistency");
            let mut lfs_url = downstream_url.clone();
            lfs_url.set_path(&format!("{}/info/lfs", lfs_url.path()));
            let fetch_err = execute(
                &raw_files::FetchDownstreamRawFiles {
                    url: downstream_url,
                    lfs_url: &lfs_url,
                    worktree: workspace.lfs_data_path().as_path(),
                    config_path: arguments.git_config_path.as_path(),
                },
                &workspace.repository_path(),
            )
            .await;
            // TODO: I would prefer to explicitly detect if the repository
            // exists or not but the code calling the Gitlab API needs work.
            // BUG: Be aware that if the downstream fails for some other reason
            // like the server being down that this will create a new
            // repository and push its contents up causing a rejection error.
            // If this happens then you need to manually delete the mirror
            // directory and on the next run it will properly import the
            // downstream sources.
            if let Err(err) = fetch_err {
                tracing::warn!("Fetch failed but might not be an error: {}", err);
            } else {
                tracing::info!("Local mirror is consistent with downstream");
            }
            let importer = importer::Importer(raw_files.clone());
            let helper = raw_files::Helper(workspace.clone());
            // initialize the LFS repository if it hasn't been already creating
            // the first commit which enables tracking of everything within.
            helper
                .initial_commit_if_missing(arguments.git_config_path.as_path())
                .await?;
            // download any new raw files
            let modified = importer
                .ensure(&workspace.lfs_data_path(), arguments)
                .await?;
            if modified {
                // import new data that was just downloaded
                helper
                    .import_data(arguments.git_config_path.as_path())
                    .await?;
            } else {
                tracing::info!("No new files were added or removed, nothing to do")
            };

            tracing::info!("Synchronizing local raw-file mirror to downstream");
            let mut lfs_url = downstream_url.clone();
            lfs_url.set_path(&format!("{}/info/lfs", lfs_url.path()));
            execute(
                &raw_files::PushRawFiles {
                    url: downstream_url,
                    lfs_url: &lfs_url,
                    worktree: workspace.lfs_data_path().as_path(),
                    config_path: arguments.git_config_path.as_path(),
                },
                workspace.repository_path().as_path(),
            )
            .await?;
            // LFS breaks porcelain git output so we just fake it since we only
            // supporting syncing to a single branch anyway.
            Ok(MirrorStatus {
                push_refs: PushRefs(
                    vec![(
                        DEFAULT_BRANCH_NAME.to_string(),
                        String::from("raw files pushed successfully"),
                    )],
                    vec![],
                ),
                warnings: vec![],
            })
        }

        LorrySpec::Tarball(_) => unimplemented!("This will be removed soon"),
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use std::path::Path;

    use git2::Repository;
    use tempfile::tempdir;

    use crate::git_config::Config;
    use crate::test_server::{spawn_test_server, TestRepo, EXAMPLE_COMMIT};
    use crate::workspace::Workspace;

    #[test]
    fn test_parse_refs() {
        let (refs, missing) = parse_refs(
            &[
                String::from("refs/heads/master"),
                String::from("refs/tags/v1.0.0"),
                String::from("refs/tags/v1.0.1"),
                String::from("refs/tags/v1.0.2-rc1"),
                String::from("some-random-string"),
            ],
            Some(&[
                String::from("refs/heads/ma*"),
                String::from("v1.0.1"),
                String::from("refs/tags/v*"),
                String::from("notgonnamatch"),
            ]),
            // ignore RC releases
            Some(&[String::from("refs/tags/*-rc*")]),
        )
        .unwrap();
        assert!(refs.iter().any(|key| *key == "refs/heads/master"));
        assert!(refs.iter().any(|key| *key == "refs/tags/v1.0.0"));
        assert!(refs.iter().any(|key| *key == "refs/tags/v1.0.1"));
        assert!(!refs.iter().any(|key| *key == "some-random-string"));
        assert!(!refs.iter().any(|key| *key == "refs/tags/v1.0.2-rc1"));
        assert!(missing.first().unwrap() == "notgonnamatch");
        assert!(missing.len() == 1);
    }

    #[tokio::test]
    async fn test_try_mirror() {
        let upstream_repo = TestRepo((String::from("hello.git"), vec![EXAMPLE_COMMIT.to_string()]));
        let downstream_repo = TestRepo((String::from("hello.git"), vec![]));
        let test_dir = tempdir().unwrap();
        let mirror_workspace_dir = test_dir.path().join("workspace");
        let git_config_path = test_dir.path().join("gitconfig");
        let git_config = Config(git_config_path.to_path_buf());
        git_config
            .setup("hello@example.org", 1, false, Path::new("/dev/null"))
            .unwrap();
        let repos_upstream_dir = test_dir.path().join("repos_upstream");
        let repos_downstream_dir = test_dir.path().join("repos_downstream");
        let test_workspace =
            Workspace::new(test_dir.path().join("workspace").as_path(), "test_fetch");
        test_workspace.init_if_missing(false).unwrap();
        // Upstream server
        let upstream_address =
            spawn_test_server(repos_upstream_dir.as_path(), &[upstream_repo.clone()])
                .await
                .unwrap();
        // Downstream server
        let downstream_address =
            spawn_test_server(repos_downstream_dir.as_path(), &[downstream_repo.clone()])
                .await
                .unwrap();

        let workspace = Workspace::new(mirror_workspace_dir.as_path(), "test-repo");
        workspace.init_if_missing(false).unwrap();

        try_mirror(
            &LorrySpec::Git(SingleLorry {
                url: upstream_repo.address(&upstream_address).to_string(),
                ..Default::default()
            }),
            "test_lorry",
            &downstream_repo.address(&downstream_address),
            &workspace,
            &Arguments {
                working_area: test_dir.path().to_path_buf(),
                use_git_binary: Some(true),
                git_config_path,
                ..Default::default()
            },
        )
        .await
        .unwrap();

        // ensure that downstream contains the git from the upstream
        let repository = Repository::open_bare(repos_downstream_dir.join("hello.git")).unwrap();
        let mut walk = repository.revwalk().unwrap();
        walk.push_head().unwrap();
        let last_commit_id = walk.next().unwrap().unwrap();
        let last_commit = repository.find_commit(last_commit_id).unwrap();
        let last_commit_message = last_commit.message().unwrap();
        assert!(last_commit_message == "Test Commit: 1/1")
    }
}