summaryrefslogtreecommitdiffstats
path: root/src/transfer.mli
blob: 61fdb300289439ee224692a39740117adeaacd49 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
(* Unison file synchronizer: src/transfer.mli *)
(* Copyright 1999-2018, Benjamin C. Pierce (see COPYING for details) *)

(*
   Rsync : general algorithm description

     The rsync algorithm is a technique for reducing the cost of a file
   transfer by avoiding the transfer of blocks that are already at the
   destination.
     Imagine we have source and destination computers that have files X and
   Y respectively, where X and Y are similar. The algorithm proceeds as
   follows :
   - The destination computer divides file Y into blocks of an agreed-upon
     size N.
   - For each block, the destination computer computes two functions of the
     block's contents :
       - A 128-bit fingerprint of the block, which with very high
         probability is different from the fingerprints of different blocks.
       - A small checksum, which can be computed in a "rolling" fashion.
         More precisely, if we are given the checksum for the N-byte block
         at offset k, and we are given the bytes at offsets k and N+k, we
         can efficiently compute the checksum for the N-byte block at offset
         k+1.
   - The destination computer sends a list of fingerprints and checksums to
     the source computer. Blocks are identified implicitly by the order in
     which they appear in the list.
   - The source computer searches through file X to identify blocks that
     have the same fingerprints as blocks that appear in the list sent
     from B. The checksums are used to find candidate blocks in a single
     pass through file X. Blocks with identical fingerprints are presumed
     to be identical.
   - The source computer sends instructions for reconstructing file X at the
     destination. These instructions avoid transmitting blocks of X that are
     identical to other blocks in Y by providing the numbers of identical
     blocks and the strings containing the differences.
*)


(* Transfer instruction giving data to build a file incrementally *)
type transfer_instruction = Bytearray.t * int * int

type transmitter = transfer_instruction -> unit Lwt.t


(*************************************************************************)
(* GENERIC TRANSMISSION                                                  *)
(*************************************************************************)

(* Send the whole source file encoded in transfer instructions *)
val send :
    in_channel             (* source file descriptor *)
 -> Uutil.Filesize.t       (* source file length *)
 -> (int -> unit)          (* progress report *)
 -> transmitter            (* transfer instruction transmitter *)
 -> unit Lwt.t

val receive :
    out_channel            (* destination file descriptor *)
 -> (int -> unit)          (* progress report *)
 -> transfer_instruction   (* transfer instruction received *)
 -> bool                   (* Whether we have reach the end of the file *)


(*************************************************************************)
(* RSYNC TRANSMISSION                                                    *)
(*************************************************************************)

module Rsync :
  sig

    (*** DESTINATION HOST ***)

    (* The rsync compression can only be activated when the file size is
       greater than the threshold *)
    val aboveRsyncThreshold : Uutil.Filesize.t -> bool

    (* Built from the old file by the destination computer *)
    type rsync_block_info

    (* Expected size of the [rsync_block_info] datastructure (in KiB). *)
    val memoryFootprint : Uutil.Filesize.t -> Uutil.Filesize.t -> int

    (* Compute block information from the old file *)
    val rsyncPreprocess :
           in_channel            (* old file descriptor *)
        -> Uutil.Filesize.t      (* source file length *)
        -> Uutil.Filesize.t      (* destination file length *)
        -> rsync_block_info * int

    (* Interpret a transfer instruction *)
    val rsyncDecompress :
           int                   (* block size *)
        -> in_channel            (* old file descriptor *)
        -> out_channel           (* output file descriptor *)
        -> (int -> unit)         (* progress report *)
        -> transfer_instruction  (* transfer instruction received *)
        -> bool

    (*** SOURCE HOST ***)

    (* Using block information, parse the new file and send transfer
       instructions accordingly *)
    val rsyncCompress :
           rsync_block_info
                              (* block info received from the destination *)
        -> in_channel         (* new file descriptor *)
        -> Uutil.Filesize.t   (* source file length *)
        -> (int -> unit)      (* progress report *)
        -> transmitter        (* transfer instruction transmitter *)
        -> unit Lwt.t

  end