diff options
-rw-r--r-- | src/utils/cpp/TARGETS | 9 | ||||
-rw-r--r-- | src/utils/cpp/incremental_reader.cpp | 139 | ||||
-rw-r--r-- | src/utils/cpp/incremental_reader.hpp | 132 |
3 files changed, 280 insertions, 0 deletions
diff --git a/src/utils/cpp/TARGETS b/src/utils/cpp/TARGETS index 6376d1b6..563e7825 100644 --- a/src/utils/cpp/TARGETS +++ b/src/utils/cpp/TARGETS @@ -119,4 +119,13 @@ , "hdrs": ["in_place_visitor.hpp"] , "stage": ["src", "utils", "cpp"] } +, "incremental_reader": + { "type": ["@", "rules", "CC", "library"] + , "name": ["incremental_reader"] + , "hdrs": ["incremental_reader.hpp"] + , "srcs": ["incremental_reader.cpp"] + , "deps": ["expected", ["@", "gsl", "", "gsl"]] + , "private-deps": ["in_place_visitor", ["@", "fmt", "", "fmt"]] + , "stage": ["src", "utils", "cpp"] + } } diff --git a/src/utils/cpp/incremental_reader.cpp b/src/utils/cpp/incremental_reader.cpp new file mode 100644 index 00000000..c1b5bc3a --- /dev/null +++ b/src/utils/cpp/incremental_reader.cpp @@ -0,0 +1,139 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/utils/cpp/incremental_reader.hpp" + +#include <cstdint> +#include <exception> +#include <string_view> + +#include "fmt/core.h" +#include "src/utils/cpp/in_place_visitor.hpp" + +namespace { +void DisposeFile(gsl::owner<std::FILE*> file) noexcept { + if (file == nullptr) { + return; + } + std::fclose(file); +} +} // namespace + +auto IncrementalReader::FromFile(std::size_t chunk_size, + std::filesystem::path const& path) noexcept + -> expected<IncrementalReader, std::string> { + if (chunk_size == 0) { + return unexpected<std::string>{ + "IncrementalReader: the chunk size cannot be 0"}; + } + + try { + // Ensure this is a file: + if (not std::filesystem::is_regular_file(path)) { + return unexpected{fmt::format( + "IncrementalReader: not a file :\n {} ", path.string())}; + } + + // Open file for reading: + static constexpr std::string_view kReadBinary = "rb"; + auto file = std::shared_ptr<std::FILE>{ + std::fopen(path.c_str(), kReadBinary.data()), ::DisposeFile}; + if (file == nullptr) { + return unexpected{ + fmt::format("IncrementalReader: failed to open the file:\n{}", + path.string())}; + } + + std::size_t const content_size = std::filesystem::file_size(path); + return IncrementalReader{chunk_size, + content_size, + std::move(file), + /*buffer=*/std::string(chunk_size, '\0')}; + } catch (std::exception const& e) { + return unexpected{fmt::format( + "IncrementalReader: While processing {}\ngot an exception: {}", + path.string(), + e.what())}; + } catch (...) { + return unexpected{fmt::format( + "IncrementalReader: While processing {}\ngot an unknown exception", + path.string())}; + } +} + +auto IncrementalReader::ReadChunk(std::size_t offset) const noexcept + -> expected<std::string_view, std::string> { + using Result = expected<std::string_view, std::string>; + InPlaceVisitor const visitor{ + [this, offset](FileSource const& file) -> Result { + return ReadFromFile(file, offset); + }, + }; + + try { + return std::visit(visitor, content_); + } catch (std::exception const& e) { + return unexpected{fmt::format( + "IncrementalReader: ReadChunk got an exception:\n{}", e.what())}; + } catch (...) { + return unexpected<std::string>{ + "IncrementalReader: ReadChunk got an unknown exception"}; + } +} + +auto IncrementalReader::ReadFromFile(FileSource const& file, std::size_t offset) + const -> expected<std::string_view, std::string> { + if (file == nullptr) { + return unexpected<std::string>{ + "IncrementalReader: ReadFromFile: got corrupted file"}; + } + + if (std::fseek(file.get(), gsl::narrow<std::int64_t>(offset), SEEK_SET) != + 0) { + return unexpected<std::string>{ + "IncrementalReader: ReadFromFile: failed to set offset"}; + } + + std::size_t read = 0; + while (std::feof(file.get()) == 0 and std::ferror(file.get()) == 0 and + read < buffer_.size()) { + read += std::fread( + &buffer_[read], sizeof(char), buffer_.size() - read, file.get()); + } + if (std::ferror(file.get()) != 0) { + return unexpected{ + fmt::format("IncrementalReader: ReadFromFile: ferror {}", + std::ferror(file.get()))}; + } + return std::string_view{buffer_.data(), read}; +} + +IncrementalReader::Iterator::Iterator( + gsl::not_null<IncrementalReader const*> const& owner, + std::size_t offset) noexcept + : owner_{owner}, offset_{offset} {} + +auto IncrementalReader::Iterator::operator*() const noexcept + -> expected<std::string_view, std::string> { + return owner_->ReadChunk(offset_); +} + +auto IncrementalReader::Iterator::operator++() noexcept + -> IncrementalReader::Iterator& { + offset_ += owner_->chunk_size_; + if (offset_ >= owner_->content_size_) { + offset_ = owner_->GetEndOffset(); + } + return *this; +} diff --git a/src/utils/cpp/incremental_reader.hpp b/src/utils/cpp/incremental_reader.hpp new file mode 100644 index 00000000..f8f6d32b --- /dev/null +++ b/src/utils/cpp/incremental_reader.hpp @@ -0,0 +1,132 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_UTILS_CPP_INCREMENTAL_READER_HPP +#define INCLUDED_SRC_UTILS_CPP_INCREMENTAL_READER_HPP + +#include <algorithm> +#include <cstddef> +#include <cstdio> +#include <filesystem> +#include <iterator> +#include <memory> +#include <string> +#include <string_view> +#include <utility> +#include <variant> + +#include "gsl/gsl" +#include "src/utils/cpp/expected.hpp" + +/// \brief Read data from source incrementally chunk by chunk. +/// - Ensures that chunks are exactly the specified size if EOF is not reached. +/// - Ensures no allocations happen while reading. Uses pre-allocated buffer and +/// utilizes std::string_view. +/// - Guarantees to return at least one chunk for an empty source. +class IncrementalReader final { + class Iterator final { + public: + using value_type = expected<std::string_view, std::string>; + using pointer = value_type*; + using reference = value_type&; + using difference_type = std::ptrdiff_t; + using iterator_category = std::forward_iterator_tag; + + explicit Iterator(gsl::not_null<IncrementalReader const*> const& owner, + std::size_t offset) noexcept; + + auto operator*() const noexcept -> value_type; + auto operator++() noexcept -> Iterator&; + + [[nodiscard]] friend auto operator==(Iterator const& lhs, + Iterator const& rhs) noexcept + -> bool { + return lhs.owner_.get() == rhs.owner_.get() and + lhs.offset_ == rhs.offset_; + } + + [[nodiscard]] friend auto operator!=(Iterator const& lhs, + Iterator const& rhs) noexcept + -> bool { + return not(lhs == rhs); + } + + private: + // Store by pointer to allow copies: + gsl::not_null<IncrementalReader const*> owner_; + std::size_t offset_; + }; + + public: + /// \brief Create IncrementalReader that uses the given file as the source + /// of data. + /// \param chunk_size Size of chunk, must be greater than 0. + /// \param path File to read. + /// \return Configured reader on success or an error message on failure. + [[nodiscard]] static auto FromFile( + std::size_t chunk_size, + std::filesystem::path const& path) noexcept + -> expected<IncrementalReader, std::string>; + + [[nodiscard]] auto GetContentSize() const noexcept -> std::size_t { + return content_size_; + } + + /// \brief Create an iterator corresponding to the given offset. If the + /// offset exceeds the maximum content size, it is adjusted. + [[nodiscard]] auto make_iterator(std::size_t offset) const noexcept + -> Iterator { + return Iterator{this, std::min(offset, GetEndOffset())}; + } + + [[nodiscard]] auto begin() const& noexcept -> Iterator { + return make_iterator(/*offset=*/0); + } + + [[nodiscard]] auto end() const& noexcept -> Iterator { + return make_iterator(GetEndOffset()); + } + + private: + using FileSource = std::shared_ptr<std::FILE>; + using ContentSource = std::variant<FileSource>; + + std::size_t chunk_size_; + std::size_t content_size_; + ContentSource content_; + mutable std::string buffer_; + + explicit IncrementalReader(std::size_t chunk_size, + std::size_t content_size, + ContentSource content, + std::string buffer) noexcept + : chunk_size_{chunk_size}, + content_size_{content_size}, + content_{std::move(content)}, + buffer_{std::move(buffer)} {} + + [[nodiscard]] auto ReadChunk(std::size_t offset) const noexcept + -> expected<std::string_view, std::string>; + + [[nodiscard]] auto ReadFromFile(FileSource const& file, std::size_t offset) + const -> expected<std::string_view, std::string>; + + /// \brief Obtain offset corresponding to the end of content. The content + /// size is shifted by 1 character to properly handle empty sources. + [[nodiscard]] auto GetEndOffset() const noexcept -> std::size_t { + return content_size_ + 1; + } +}; + +#endif // INCLUDED_SRC_UTILS_CPP_INCREMENTAL_READER_HPP |