[intel-npu] Add functional tests to check correct memory deallocation (…

…openvinotoolkit#25980) ### Details: - *The following new test cases are being added:* ``` 1) allocate / de-allocate a model for N times 2) allocate a model create infer request / destroy infer request for N times de-allocate and destroy the model 3) allocate a model create infer request set_tensort(with new buffer every time) for N times destroy infer request de-allocate and destroy the model 4) allocate a model create infer request local_tensor = infer_request.get_output_tensor() destroy infer request // output tensor should not be deallocated here de-allocate and destroy the model destroy local_tensor // output tensor should be deallocated here ``` ### Tickets: - *E129376*
barnasm1 · Sep 6, 2024 · 69180e2 · 69180e2
1 parent 5f0fc54
commit 69180e2
Show file tree

Hide file tree

Showing 8 changed files with 403 additions and 294 deletions.
diff --git a/...plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp b/...plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp
@@ -5,6 +5,7 @@
 #include <random>
 
 #include "base/ov_behavior_test_utils.hpp"
+#include "common/functions.h"
 #include "common/npu_test_env_cfg.hpp"
 #include "common_test_utils/node_builders/constant.hpp"
 #include "graph_transformations.hpp"
@@ -22,22 +23,6 @@ namespace ov::test::behavior {
 class DriverCompilerAdapterCustomStreamTestNPU : public ov::test::behavior::OVPluginTestBase,
                                                  public testing::WithParamInterface<CompilationParams> {
 public:
-    std::shared_ptr<ov::Model> createModelWithLargeSize() {
-        auto data = std::make_shared<ov::opset11::Parameter>(ov::element::f16, ov::Shape{4000, 4000});
-        auto mul_constant = ov::opset11::Constant::create(ov::element::f16, ov::Shape{1}, {1.5});
-        auto mul = std::make_shared<ov::opset11::Multiply>(data, mul_constant);
-        auto add_constant = ov::opset11::Constant::create(ov::element::f16, ov::Shape{1}, {0.5});
-        auto add = std::make_shared<ov::opset11::Add>(mul, add_constant);
-        // Just a sample model here, large iteration to make the model large
-        for (int i = 0; i < 1000; i++) {
-            add = std::make_shared<ov::opset11::Add>(add, add_constant);
-        }
-        auto res = std::make_shared<ov::opset11::Result>(add);
-
-        /// Create the OpenVINO model
-        return std::make_shared<ov::Model>(ov::ResultVector{std::move(res)}, ov::ParameterVector{std::move(data)});
-    }
-
     std::string generateRandomFileName() {
         std::stringstream ss;
         auto now = std::chrono::high_resolution_clock::now();

diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_plugin/core_integration.cpp b/src/plugins/intel_npu/tests/functional/behavior/ov_plugin/core_integration.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "overload/ov_plugin/core_integration.hpp"
+
+using namespace ov::test::behavior;
+
+namespace {
+
+const char* NPU_PLUGIN_LIB_NAME = "openvino_intel_npu_plugin";
+
+std::vector<std::string> devices = {
+    std::string(ov::test::utils::DEVICE_NPU),
+};
+
+std::pair<std::string, std::string> plugins[] = {
+    std::make_pair(std::string(NPU_PLUGIN_LIB_NAME), std::string(ov::test::utils::DEVICE_NPU)),
+};
+
+namespace OVClassBasicTestName {
+static std::string getTestCaseName(testing::TestParamInfo<std::pair<std::string, std::string>> obj) {
+    std::ostringstream result;
+    result << "OVClassBasicTestName_" << obj.param.first << "_" << obj.param.second;
+    result << "_targetDevice=" << ov::test::utils::getTestsPlatformFromEnvironmentOr(ov::test::utils::DEVICE_NPU);
+
+    return result.str();
+}
+}  // namespace OVClassBasicTestName
+
+namespace OVClassNetworkTestName {
+static std::string getTestCaseName(testing::TestParamInfo<std::string> obj) {
+    std::ostringstream result;
+    result << "OVClassNetworkTestName_" << obj.param;
+    result << "_targetDevice=" << ov::test::utils::getTestsPlatformFromEnvironmentOr(ov::test::utils::DEVICE_NPU);
+
+    return result.str();
+}
+}  // namespace OVClassNetworkTestName
+
+const std::vector<ov::AnyMap> configs = {{}};
+
+#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests_OVClassBasicTestP,
+                         OVClassBasicTestPNPU,
+                         ::testing::ValuesIn(plugins),
+                         OVClassBasicTestName::getTestCaseName);
+#endif
+
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests_OVClassNetworkTestP,
+                         OVClassNetworkTestPNPU,
+                         ::testing::Combine(::testing::ValuesIn(devices), ::testing::ValuesIn(configs)),
+                         OVClassNetworkTestPNPU::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests_OVClassLoadNetworkTest,
+                         OVClassLoadNetworkTestNPU,
+                         ::testing::Combine(::testing::ValuesIn(devices), ::testing::ValuesIn(configs)),
+                         OVClassLoadNetworkTestNPU::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(nightly_BehaviorTests_OVClassGetMetricTest,
+                         OVClassGetMetricAndPrintNoThrow,
+                         ::testing::Values(ov::test::utils::DEVICE_NPU),
+                         OVClassNetworkTestName::getTestCaseName);
+
+}  // namespace
diff --git a/src/plugins/intel_npu/tests/functional/common/functions.cpp b/src/plugins/intel_npu/tests/functional/common/functions.cpp
@@ -5,6 +5,7 @@
 #include "functions.h"
 #include "common/npu_test_env_cfg.hpp"
 #include "openvino/op/softmax.hpp"
+#include "openvino/opsets/opset11.hpp"
 #include "openvino/runtime/intel_npu/properties.hpp"
 
 std::shared_ptr<ov::Model> buildSingleLayerSoftMaxNetwork() {
@@ -26,6 +27,22 @@ std::shared_ptr<ov::Model> buildSingleLayerSoftMaxNetwork() {
     return ov_model;
 }
 
+std::shared_ptr<ov::Model> createModelWithLargeSize() {
+    auto data = std::make_shared<ov::opset11::Parameter>(ov::element::f16, ov::Shape{4000, 4000});
+    auto mul_constant = ov::opset11::Constant::create(ov::element::f16, ov::Shape{1}, {1.5});
+    auto mul = std::make_shared<ov::opset11::Multiply>(data, mul_constant);
+    auto add_constant = ov::opset11::Constant::create(ov::element::f16, ov::Shape{1}, {0.5});
+    auto add = std::make_shared<ov::opset11::Add>(mul, add_constant);
+    // Just a sample model here, large iteration to make the model large
+    for (int i = 0; i < 1000; i++) {
+        add = std::make_shared<ov::opset11::Add>(add, add_constant);
+    }
+    auto res = std::make_shared<ov::opset11::Result>(add);
+
+    /// Create the OpenVINO model
+    return std::make_shared<ov::Model>(ov::ResultVector{std::move(res)}, ov::ParameterVector{std::move(data)});
+}
+
 const std::string PlatformEnvironment::PLATFORM = []() -> std::string {
     const auto& var = ov::test::utils::NpuTestEnvConfig::getInstance().IE_NPU_TESTS_PLATFORM;
     if (!var.empty()) {

diff --git a/src/plugins/intel_npu/tests/functional/common/functions.h b/src/plugins/intel_npu/tests/functional/common/functions.h
@@ -9,6 +9,8 @@
 // create dummy network for tests
 std::shared_ptr<ov::Model> buildSingleLayerSoftMaxNetwork();
 
+std::shared_ptr<ov::Model> createModelWithLargeSize();
+
 // class encapsulated Platform getting from environmental variable
 class PlatformEnvironment {
 public:

diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/ov_plugin/core_integration.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/ov_plugin/core_integration.hpp
@@ -7,9 +7,12 @@
 #include <filesystem>
 #include "base/ov_behavior_test_utils.hpp"
 #include "behavior/ov_plugin/properties_tests.hpp"
+#include "common/functions.h"
 #include "common/utils.hpp"
 #include "common/npu_test_env_cfg.hpp"
+#include "common_test_utils/data_utils.hpp"
 #include "common_test_utils/subgraph_builders/concat_with_params.hpp"
+#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
 #include "common_test_utils/subgraph_builders/kso_func.hpp"
 #include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp"
 #include "common_test_utils/subgraph_builders/split_conv_concat.hpp"
@@ -169,6 +172,238 @@ TEST(compatibility_OVClassBasicPropsTestNPU, smoke_SetConfigDevicePropertiesThro
                  ov::Exception);
 }
 
+//
+// NPU specific metrics
+//
+
+using OVClassGetMetricAndPrintNoThrow = OVClassBaseTestP;
+TEST_P(OVClassGetMetricAndPrintNoThrow, DeviceAllocMemSizeLesserThanTotalMemSizeNPU) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core ie;
+    ov::Any p;
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::device_total_mem_size.name()));
+    uint64_t t = p.as<uint64_t>();
+    ASSERT_NE(t, 0);
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t a = p.as<uint64_t>();
+
+    ASSERT_LT(a, t);
+
+    std::cout << "OV NPU device alloc/total memory size: " << a << "/" << t << std::endl;
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, DeviceAllocMemSizeLesserAfterModelIsLoadedNPU) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core ie;
+    ov::Any p;
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t a1 = p.as<uint64_t>();
+
+    SKIP_IF_CURRENT_TEST_IS_DISABLED() {
+        auto model = ov::test::utils::make_conv_pool_relu();
+        OV_ASSERT_NO_THROW(ie.compile_model(model, target_device, {}));
+    }
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t a2 = p.as<uint64_t>();
+
+    std::cout << "OV NPU device {alloc before load network/alloc after load network} memory size: {" << a1 << "/" << a2
+              << "}" << std::endl;
+
+    // after the network is loaded onto device, allocated memory value should increase
+    ASSERT_LE(a1, a2);
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, VpuDeviceAllocMemSizeLesserAfterModelIsLoaded) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core ie;
+    ov::Any p;
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t a1 = p.as<uint64_t>();
+
+    SKIP_IF_CURRENT_TEST_IS_DISABLED() {
+        auto model = ov::test::utils::make_conv_pool_relu();
+        OV_ASSERT_NO_THROW(ie.compile_model(model, target_device, ov::AnyMap{ov::log::level(ov::log::Level::DEBUG)}));
+    }
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t a2 = p.as<uint64_t>();
+
+    std::cout << "OV NPU device {alloc before load network/alloc after load network} memory size: {" << a1 << "/" << a2
+              << "}" << std::endl;
+
+    // after the network is loaded onto device, allocated memory value should increase
+    ASSERT_LE(a1, a2);
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, VpuDeviceAllocMemSizeSameAfterDestroyCompiledModel) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core core;
+    ov::Any deviceAllocMemSizeAny;
+
+    auto model = createModelWithLargeSize();
+
+    OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                               core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t deviceAllocMemSize = deviceAllocMemSizeAny.as<uint64_t>();
+    uint64_t deviceAllocMemSizeFinal;
+
+    for (size_t i = 0; i < 1000; ++i) {
+        ov::CompiledModel compiledModel;
+        OV_ASSERT_NO_THROW(compiledModel = core.compile_model(model, target_device));
+
+        compiledModel = {};
+
+        OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                                   core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+        deviceAllocMemSizeFinal = deviceAllocMemSizeAny.as<uint64_t>();
+        ASSERT_EQ(deviceAllocMemSize, deviceAllocMemSizeFinal) << " at iteration " << i;
+    }
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, VpuDeviceAllocMemSizeSameAfterDestroyInferRequest) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core core;
+    ov::Any deviceAllocMemSizeAny;
+
+    ov::CompiledModel compiledModel;
+    auto model = createModelWithLargeSize();
+
+    OV_ASSERT_NO_THROW(compiledModel = core.compile_model(model, target_device));
+
+    // After memory consumption updates, need to run first inference before measurements
+    auto inferRequest = compiledModel.create_infer_request();
+    inferRequest.infer();
+    inferRequest = {};
+
+    OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                               core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t deviceAllocMemSize = deviceAllocMemSizeAny.as<uint64_t>();
+    uint64_t deviceAllocMemSizeFinal;
+
+    for (size_t i = 0; i < 1000; ++i) {
+        inferRequest = compiledModel.create_infer_request();
+        inferRequest.infer();
+
+        inferRequest = {};
+
+        OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                                   core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+        deviceAllocMemSizeFinal = deviceAllocMemSizeAny.as<uint64_t>();
+        ASSERT_EQ(deviceAllocMemSize, deviceAllocMemSizeFinal) << " at iteration " << i;
+    }
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, VpuDeviceAllocMemSizeSameAfterDestroyInferRequestSetTensor) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core core;
+    ov::Any deviceAllocMemSizeAny;
+
+    ov::CompiledModel compiledModel;
+    auto model = createModelWithLargeSize();
+
+    OV_ASSERT_NO_THROW(compiledModel = core.compile_model(model, target_device));
+
+    // After memory consumption updates, need to run first inference before measurements
+    auto inferRequest = compiledModel.create_infer_request();
+    inferRequest.infer();
+    inferRequest = {};
+
+    OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                               core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t deviceAllocMemSize = deviceAllocMemSizeAny.as<uint64_t>();
+    uint64_t deviceAllocMemSizeFinal;
+
+    for (size_t i = 0; i < 1000; ++i) {
+        auto inferRequest = compiledModel.create_infer_request();
+        auto tensor = ov::Tensor(model->input(0).get_element_type(), model->input(0).get_shape());
+        ov::test::utils::fill_data_random(static_cast<ov::float16*>(tensor.data()), tensor.get_size());
+        inferRequest.set_input_tensor(tensor);
+        inferRequest.infer();
+
+        inferRequest = {};
+
+        OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                                   core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+        deviceAllocMemSizeFinal = deviceAllocMemSizeAny.as<uint64_t>();
+        ASSERT_EQ(deviceAllocMemSize, deviceAllocMemSizeFinal) << " at iteration " << i;
+    }
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, VpuDeviceAllocMemSizeSameAfterDestroyInferRequestGetTensor) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core core;
+    ov::Any deviceAllocMemSizeAny;
+
+    ov::CompiledModel compiledModel;
+    auto model = createModelWithLargeSize();
+
+    OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                               core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+    uint64_t deviceAllocMemSize = deviceAllocMemSizeAny.as<uint64_t>();
+    uint64_t deviceAllocMemSizeFinal;
+
+    for (size_t i = 0; i < 1000; ++i) {
+        OV_ASSERT_NO_THROW(compiledModel = core.compile_model(model, target_device));
+
+        auto inferRequest = compiledModel.create_infer_request();
+        auto tensor = inferRequest.get_output_tensor();
+        inferRequest.infer();
+
+        inferRequest = {};
+        compiledModel = {};
+
+        OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                                   core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+        deviceAllocMemSizeFinal = deviceAllocMemSizeAny.as<uint64_t>();
+        ASSERT_LT(deviceAllocMemSize, deviceAllocMemSizeFinal) << " at iteration " << i;
+
+        tensor = {};
+
+        OV_ASSERT_NO_THROW(deviceAllocMemSizeAny =
+                                   core.get_property(target_device, ov::intel_npu::device_alloc_mem_size.name()));
+        deviceAllocMemSizeFinal = deviceAllocMemSizeAny.as<uint64_t>();
+        ASSERT_EQ(deviceAllocMemSize, deviceAllocMemSizeFinal) << " at iteration " << i;
+    }
+}
+
+TEST_P(OVClassGetMetricAndPrintNoThrow, DriverVersionNPU) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core ie;
+    ov::Any p;
+
+    OV_ASSERT_NO_THROW(p = ie.get_property(target_device, ov::intel_npu::driver_version.name()));
+    uint32_t t = p.as<uint32_t>();
+
+    std::cout << "NPU driver version is " << t << std::endl;
+
+    OV_ASSERT_PROPERTY_SUPPORTED(ov::intel_npu::driver_version.name());
+}
+
+using OVClassCompileModel = OVClassBaseTestP;
+TEST_P(OVClassCompileModel, CompileModelWithDifferentThreadNumbers) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    ov::Core ie;
+    ov::Any p;
+
+    auto model = ov::test::utils::make_conv_pool_relu();
+    OV_ASSERT_NO_THROW(ie.compile_model(model, target_device, {{ov::compilation_num_threads.name(), ov::Any(1)}}));
+
+    OV_ASSERT_NO_THROW(ie.compile_model(model, target_device, {{ov::compilation_num_threads.name(), ov::Any(2)}}));
+
+    OV_ASSERT_NO_THROW(ie.compile_model(model, target_device, {{ov::compilation_num_threads.name(), ov::Any(4)}}));
+
+    EXPECT_ANY_THROW(ie.compile_model(model, target_device, {{ov::compilation_num_threads.name(), ov::Any(-1)}}));
+    OV_EXPECT_THROW(
+        std::ignore = ie.compile_model(model, target_device, {{ov::compilation_num_threads.name(), ov::Any(-1)}}),
+        ::ov::Exception,
+        testing::HasSubstr("ov::compilation_num_threads must be positive int32 value"));
+}
+
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
 
 TEST_P(OVClassBasicTestPNPU, smoke_registerPluginsLibrariesUnicodePath) {