summaryrefslogtreecommitdiff
path: root/src/buildtool/file_system/file_storage.hpp
blob: 5748ae49cbe74ac9c95b2de781bf9be06bd38b98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_FILE_STORAGE_HPP
#define INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_FILE_STORAGE_HPP

#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <string>
#include <type_traits>
#include <utility>

#include "src/buildtool/execution_api/common/ids.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/file_system/object_type.hpp"
#include "src/buildtool/logging/log_level.hpp"
#include "src/buildtool/logging/logger.hpp"

enum class StoreMode : std::uint8_t {
    // First thread to write conflicting file wins.
    FirstWins,
    // Last thread to write conflicting file wins, effectively overwriting
    // existing entries. NOTE: This might cause races if hard linking from
    // stored files due to an issue with the interaction of rename(2) and
    // link(2) (see: https://stackoverflow.com/q/69076026/1107763).
    LastWins
};

struct FileStorageData final {
    /// Length of a subdirectory name.
    static constexpr size_t kDirectoryNameLength = 2;
};

template <ObjectType kType,
          StoreMode kMode,
          bool kSetEpochTime,
          class = std::enable_if_t<kType == ObjectType::File or
                                   kType == ObjectType::Executable>>
class FileStorage {
  public:
    explicit FileStorage(std::filesystem::path storage_root) noexcept
        : storage_root_{std::move(storage_root)} {}

    /// \brief Add file to storage.
    /// \returns true if file exists afterward.
    [[nodiscard]] auto AddFromFile(std::string const& id,
                                   std::filesystem::path const& source_path,
                                   bool is_owner = false) const noexcept
        -> bool {
        return AtomicAddFromFile(id, source_path, is_owner);
    }

    /// \brief Add bytes to storage.
    /// \returns true if file exists afterward.
    [[nodiscard]] auto AddFromBytes(std::string const& id,
                                    std::string const& bytes) const noexcept
        -> bool {
        return AtomicAddFromBytes(id, bytes);
    }

    /// \brief Determines the storage path of a blob identified by a hash value.
    /// The same sharding technique as used in git is applied, meaning, the hash
    /// value is separated into a directory part and file part. Two characters
    /// are used for the directory part, the rest for the file, which results in
    /// 256 possible directories.
    /// \param id       The hash value of the blob.
    /// \returns The sharded file path.
    [[nodiscard]] auto GetPath(std::string const& id) const noexcept
        -> std::filesystem::path {
        return storage_root_ /
               id.substr(0, FileStorageData::kDirectoryNameLength) /
               id.substr(FileStorageData::kDirectoryNameLength,
                         id.size() - FileStorageData::kDirectoryNameLength);
    }

    [[nodiscard]] auto StorageRoot() const noexcept
        -> std::filesystem::path const& {
        return storage_root_;
    }

  private:
    static constexpr bool kFdLess{kType == ObjectType::Executable};
    std::filesystem::path storage_root_;

    /// \brief Add file to storage from file path via link or copy and rename.
    /// If a race-condition occurs, the winning thread will be the one
    /// performing the link/rename operation first or last, depending on kMode
    /// being set to FirstWins or LastWins, respectively. All threads will
    /// signal success.
    /// \returns true if file exists afterward.
    [[nodiscard]] auto AtomicAddFromFile(std::string const& id,
                                         std::filesystem::path const& path,
                                         bool is_owner) const noexcept -> bool {
        auto file_path = GetPath(id);
        if ((kMode == StoreMode::LastWins or
             not FileSystemManager::Exists(file_path)) and
            FileSystemManager::CreateDirectory(file_path.parent_path())) {
            auto direct_create = kMode == StoreMode::FirstWins and is_owner;
            auto create_directly = [&]() {
                // Entry does not exist and we are owner of the file (e.g., file
                // generated in the execution directory). Try to hard link it
                // directly or check its existence if it was created by now.
                return FileSystemManager::CreateFileHardlinkAs<kType,
                                                               kSetEpochTime>(
                           path,
                           file_path,
                           /*log_failure_at=*/LogLevel::Debug) or
                       FileSystemManager::IsFile(file_path);
            };
            auto create_and_stage = [&]() {
                // Entry exists and we need to overwrite it, or we are not owner
                // of the file. Create the file in a process/thread-local
                // temporary path and stage it.
                auto unique_path = CreateUniquePath(file_path);
                return unique_path and
                       CreateFileFromPath(*unique_path, path, is_owner) and
                       StageFile(*unique_path, file_path);
            };
            if (direct_create ? create_directly() : create_and_stage()) {
                Logger::Log(
                    LogLevel::Trace, "created entry {}.", file_path.string());
                return true;
            }
        }
        return FileSystemManager::IsFile(file_path);
    }

    /// \brief Add file to storage from bytes via write and atomic rename.
    /// If a race-condition occurs, the winning thread will be the one
    /// performing the rename operation first or last, depending on kMode being
    /// set to FirstWins or LastWins, respectively. All threads will signal
    /// success.
    /// \returns true if file exists afterward.
    [[nodiscard]] auto AtomicAddFromBytes(
        std::string const& id,
        std::string const& bytes) const noexcept -> bool {
        auto file_path = GetPath(id);
        if (kMode == StoreMode::LastWins or
            not FileSystemManager::Exists(file_path)) {
            auto unique_path = CreateUniquePath(file_path);
            if (unique_path and
                FileSystemManager::CreateDirectory(file_path.parent_path()) and
                CreateFileFromBytes(*unique_path, bytes) and
                StageFile(*unique_path, file_path)) {
                Logger::Log(
                    LogLevel::Trace, "created entry {}.", file_path.string());
                return true;
            }
        }
        return FileSystemManager::IsFile(file_path);
    }

    /// \brief Create file from file path.
    [[nodiscard]] static auto CreateFileFromPath(
        std::filesystem::path const& file_path,
        std::filesystem::path const& other_path,
        bool is_owner) noexcept -> bool {
        // For files owned by us (e.g., generated files from the execution
        // directory), prefer faster creation of hard links instead of a copy.
        // Copy executables without opening any writeable file descriptors in
        // this process to avoid those from being inherited by child processes.
        return (is_owner and
                FileSystemManager::CreateFileHardlinkAs<kType, kSetEpochTime>(
                    other_path, file_path)) or
               FileSystemManager::CopyFileAs<kType, kSetEpochTime>(
                   other_path, file_path, kFdLess);
    }

    /// \brief Create file from bytes.
    [[nodiscard]] static auto CreateFileFromBytes(
        std::filesystem::path const& file_path,
        std::string const& bytes) noexcept -> bool {
        // Write executables without opening any writeable file descriptors in
        // this process to avoid those from being inherited by child processes.
        return FileSystemManager::WriteFileAs<kType, kSetEpochTime>(
            bytes, file_path, kFdLess);
    }

    /// \brief Stage file from source path to target path.
    [[nodiscard]] static auto StageFile(
        std::filesystem::path const& src_path,
        std::filesystem::path const& dst_path) noexcept -> bool {
        switch (kMode) {
            case StoreMode::FirstWins:
                // try rename source or delete it if the target already exists
                return FileSystemManager::Rename(
                           src_path, dst_path, /*no_clobber=*/true) or
                       (FileSystemManager::IsFile(dst_path) and
                        FileSystemManager::RemoveFile(src_path));
            case StoreMode::LastWins:
                return FileSystemManager::Rename(src_path, dst_path);
        }
    }
};

#endif  // INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_FILE_STORAGE_HPP