summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/utils/cpp/TARGETS9
-rw-r--r--src/utils/cpp/incremental_reader.cpp139
-rw-r--r--src/utils/cpp/incremental_reader.hpp132
3 files changed, 280 insertions, 0 deletions
diff --git a/src/utils/cpp/TARGETS b/src/utils/cpp/TARGETS
index 6376d1b6..563e7825 100644
--- a/src/utils/cpp/TARGETS
+++ b/src/utils/cpp/TARGETS
@@ -119,4 +119,13 @@
, "hdrs": ["in_place_visitor.hpp"]
, "stage": ["src", "utils", "cpp"]
}
+, "incremental_reader":
+ { "type": ["@", "rules", "CC", "library"]
+ , "name": ["incremental_reader"]
+ , "hdrs": ["incremental_reader.hpp"]
+ , "srcs": ["incremental_reader.cpp"]
+ , "deps": ["expected", ["@", "gsl", "", "gsl"]]
+ , "private-deps": ["in_place_visitor", ["@", "fmt", "", "fmt"]]
+ , "stage": ["src", "utils", "cpp"]
+ }
}
diff --git a/src/utils/cpp/incremental_reader.cpp b/src/utils/cpp/incremental_reader.cpp
new file mode 100644
index 00000000..c1b5bc3a
--- /dev/null
+++ b/src/utils/cpp/incremental_reader.cpp
@@ -0,0 +1,139 @@
+// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utils/cpp/incremental_reader.hpp"
+
+#include <cstdint>
+#include <exception>
+#include <string_view>
+
+#include "fmt/core.h"
+#include "src/utils/cpp/in_place_visitor.hpp"
+
+namespace {
+void DisposeFile(gsl::owner<std::FILE*> file) noexcept {
+ if (file == nullptr) {
+ return;
+ }
+ std::fclose(file);
+}
+} // namespace
+
+auto IncrementalReader::FromFile(std::size_t chunk_size,
+ std::filesystem::path const& path) noexcept
+ -> expected<IncrementalReader, std::string> {
+ if (chunk_size == 0) {
+ return unexpected<std::string>{
+ "IncrementalReader: the chunk size cannot be 0"};
+ }
+
+ try {
+ // Ensure this is a file:
+ if (not std::filesystem::is_regular_file(path)) {
+ return unexpected{fmt::format(
+ "IncrementalReader: not a file :\n {} ", path.string())};
+ }
+
+ // Open file for reading:
+ static constexpr std::string_view kReadBinary = "rb";
+ auto file = std::shared_ptr<std::FILE>{
+ std::fopen(path.c_str(), kReadBinary.data()), ::DisposeFile};
+ if (file == nullptr) {
+ return unexpected{
+ fmt::format("IncrementalReader: failed to open the file:\n{}",
+ path.string())};
+ }
+
+ std::size_t const content_size = std::filesystem::file_size(path);
+ return IncrementalReader{chunk_size,
+ content_size,
+ std::move(file),
+ /*buffer=*/std::string(chunk_size, '\0')};
+ } catch (std::exception const& e) {
+ return unexpected{fmt::format(
+ "IncrementalReader: While processing {}\ngot an exception: {}",
+ path.string(),
+ e.what())};
+ } catch (...) {
+ return unexpected{fmt::format(
+ "IncrementalReader: While processing {}\ngot an unknown exception",
+ path.string())};
+ }
+}
+
+auto IncrementalReader::ReadChunk(std::size_t offset) const noexcept
+ -> expected<std::string_view, std::string> {
+ using Result = expected<std::string_view, std::string>;
+ InPlaceVisitor const visitor{
+ [this, offset](FileSource const& file) -> Result {
+ return ReadFromFile(file, offset);
+ },
+ };
+
+ try {
+ return std::visit(visitor, content_);
+ } catch (std::exception const& e) {
+ return unexpected{fmt::format(
+ "IncrementalReader: ReadChunk got an exception:\n{}", e.what())};
+ } catch (...) {
+ return unexpected<std::string>{
+ "IncrementalReader: ReadChunk got an unknown exception"};
+ }
+}
+
+auto IncrementalReader::ReadFromFile(FileSource const& file, std::size_t offset)
+ const -> expected<std::string_view, std::string> {
+ if (file == nullptr) {
+ return unexpected<std::string>{
+ "IncrementalReader: ReadFromFile: got corrupted file"};
+ }
+
+ if (std::fseek(file.get(), gsl::narrow<std::int64_t>(offset), SEEK_SET) !=
+ 0) {
+ return unexpected<std::string>{
+ "IncrementalReader: ReadFromFile: failed to set offset"};
+ }
+
+ std::size_t read = 0;
+ while (std::feof(file.get()) == 0 and std::ferror(file.get()) == 0 and
+ read < buffer_.size()) {
+ read += std::fread(
+ &buffer_[read], sizeof(char), buffer_.size() - read, file.get());
+ }
+ if (std::ferror(file.get()) != 0) {
+ return unexpected{
+ fmt::format("IncrementalReader: ReadFromFile: ferror {}",
+ std::ferror(file.get()))};
+ }
+ return std::string_view{buffer_.data(), read};
+}
+
+IncrementalReader::Iterator::Iterator(
+ gsl::not_null<IncrementalReader const*> const& owner,
+ std::size_t offset) noexcept
+ : owner_{owner}, offset_{offset} {}
+
+auto IncrementalReader::Iterator::operator*() const noexcept
+ -> expected<std::string_view, std::string> {
+ return owner_->ReadChunk(offset_);
+}
+
+auto IncrementalReader::Iterator::operator++() noexcept
+ -> IncrementalReader::Iterator& {
+ offset_ += owner_->chunk_size_;
+ if (offset_ >= owner_->content_size_) {
+ offset_ = owner_->GetEndOffset();
+ }
+ return *this;
+}
diff --git a/src/utils/cpp/incremental_reader.hpp b/src/utils/cpp/incremental_reader.hpp
new file mode 100644
index 00000000..f8f6d32b
--- /dev/null
+++ b/src/utils/cpp/incremental_reader.hpp
@@ -0,0 +1,132 @@
+// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef INCLUDED_SRC_UTILS_CPP_INCREMENTAL_READER_HPP
+#define INCLUDED_SRC_UTILS_CPP_INCREMENTAL_READER_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdio>
+#include <filesystem>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <variant>
+
+#include "gsl/gsl"
+#include "src/utils/cpp/expected.hpp"
+
+/// \brief Read data from source incrementally chunk by chunk.
+/// - Ensures that chunks are exactly the specified size if EOF is not reached.
+/// - Ensures no allocations happen while reading. Uses pre-allocated buffer and
+/// utilizes std::string_view.
+/// - Guarantees to return at least one chunk for an empty source.
+class IncrementalReader final {
+ class Iterator final {
+ public:
+ using value_type = expected<std::string_view, std::string>;
+ using pointer = value_type*;
+ using reference = value_type&;
+ using difference_type = std::ptrdiff_t;
+ using iterator_category = std::forward_iterator_tag;
+
+ explicit Iterator(gsl::not_null<IncrementalReader const*> const& owner,
+ std::size_t offset) noexcept;
+
+ auto operator*() const noexcept -> value_type;
+ auto operator++() noexcept -> Iterator&;
+
+ [[nodiscard]] friend auto operator==(Iterator const& lhs,
+ Iterator const& rhs) noexcept
+ -> bool {
+ return lhs.owner_.get() == rhs.owner_.get() and
+ lhs.offset_ == rhs.offset_;
+ }
+
+ [[nodiscard]] friend auto operator!=(Iterator const& lhs,
+ Iterator const& rhs) noexcept
+ -> bool {
+ return not(lhs == rhs);
+ }
+
+ private:
+ // Store by pointer to allow copies:
+ gsl::not_null<IncrementalReader const*> owner_;
+ std::size_t offset_;
+ };
+
+ public:
+ /// \brief Create IncrementalReader that uses the given file as the source
+ /// of data.
+ /// \param chunk_size Size of chunk, must be greater than 0.
+ /// \param path File to read.
+ /// \return Configured reader on success or an error message on failure.
+ [[nodiscard]] static auto FromFile(
+ std::size_t chunk_size,
+ std::filesystem::path const& path) noexcept
+ -> expected<IncrementalReader, std::string>;
+
+ [[nodiscard]] auto GetContentSize() const noexcept -> std::size_t {
+ return content_size_;
+ }
+
+ /// \brief Create an iterator corresponding to the given offset. If the
+ /// offset exceeds the maximum content size, it is adjusted.
+ [[nodiscard]] auto make_iterator(std::size_t offset) const noexcept
+ -> Iterator {
+ return Iterator{this, std::min(offset, GetEndOffset())};
+ }
+
+ [[nodiscard]] auto begin() const& noexcept -> Iterator {
+ return make_iterator(/*offset=*/0);
+ }
+
+ [[nodiscard]] auto end() const& noexcept -> Iterator {
+ return make_iterator(GetEndOffset());
+ }
+
+ private:
+ using FileSource = std::shared_ptr<std::FILE>;
+ using ContentSource = std::variant<FileSource>;
+
+ std::size_t chunk_size_;
+ std::size_t content_size_;
+ ContentSource content_;
+ mutable std::string buffer_;
+
+ explicit IncrementalReader(std::size_t chunk_size,
+ std::size_t content_size,
+ ContentSource content,
+ std::string buffer) noexcept
+ : chunk_size_{chunk_size},
+ content_size_{content_size},
+ content_{std::move(content)},
+ buffer_{std::move(buffer)} {}
+
+ [[nodiscard]] auto ReadChunk(std::size_t offset) const noexcept
+ -> expected<std::string_view, std::string>;
+
+ [[nodiscard]] auto ReadFromFile(FileSource const& file, std::size_t offset)
+ const -> expected<std::string_view, std::string>;
+
+ /// \brief Obtain offset corresponding to the end of content. The content
+ /// size is shifted by 1 character to properly handle empty sources.
+ [[nodiscard]] auto GetEndOffset() const noexcept -> std::size_t {
+ return content_size_ + 1;
+ }
+};
+
+#endif // INCLUDED_SRC_UTILS_CPP_INCREMENTAL_READER_HPP