diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index 72243d5a6ca..8b32d9b33fa 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -54,7 +54,7 @@ jobs: - name: Checkout Oneflow-Inc/oneflow if: ${{ github.event.inputs.oneflow-ref == '' }} uses: actions/checkout@v2 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build manylinux id: build-cuda with: @@ -72,8 +72,8 @@ jobs: clean-ccache: true compute-platform: ${{ env.COMPUTE_PLATFORM }} python-versions: | + 3.7 3.8 - 3.10 - name: Upload wheelhouse uses: ./.github/actions/upload_oss with: diff --git a/.github/workflows/on_merge.yml b/.github/workflows/on_merge.yml index 6cf96474110..f92f8e42a44 100644 --- a/.github/workflows/on_merge.yml +++ b/.github/workflows/on_merge.yml @@ -15,6 +15,6 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@support-cu12 + - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@add-nightly-date name: Update benchmark history timeout-minutes: 10 diff --git a/.github/workflows/priv_release.yml b/.github/workflows/priv_release.yml deleted file mode 100644 index 3d59b751f53..00000000000 --- a/.github/workflows/priv_release.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Priv Release - -on: - push: - branches: - - priv-release - - add-cu12-release - schedule: - # beijing: 12 pm. - # utc: 4 am. - - cron: "0 4 * * *" - workflow_dispatch: - inputs: - priv_branch: - required: false - default: "main" - -concurrency: - group: priv-release-${{ github.ref }} - cancel-in-progress: true - -jobs: - release: - name: Release pip - permissions: - contents: read - pull-requests: write - uses: ./.github/workflows/release.yml - with: - is_priv: true - branch: ${{ inputs.priv_branch || 'main' }} - secrets: - ONEFLOW_PRIV_ORG: ${{ secrets.ONEFLOW_PRIV_ORG }} - ONEFLOW_PRIV_GH_TOKEN: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - ONEFLOW_PRIV_OSS_BUCKET: ${{ secrets.ONEFLOW_PRIV_OSS_BUCKET }} - OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} - OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0bb05ee0eb8..d90504d8d11 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -14,26 +14,6 @@ on: placeholder: description: "update .github/workflows/release.yml to config your build" required: false - workflow_call: - inputs: - is_priv: - required: true - type: boolean - branch: - required: false - type: string - default: "main" - secrets: - ONEFLOW_PRIV_ORG: - required: true - ONEFLOW_PRIV_GH_TOKEN: - required: true - ONEFLOW_PRIV_OSS_BUCKET: - required: true - OSS_ACCESS_KEY_ID: - required: true - OSS_ACCESS_KEY_SECRET: - required: true concurrency: group: release-${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/master' }} @@ -51,19 +31,11 @@ jobs: steps: - name: Checkout Oneflow-Inc/oneflow uses: actions/checkout@v2 - if: ${{ !inputs.is_priv }} with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - name: Checkout oneflow - uses: actions/checkout@v2 - if: ${{ inputs.is_priv }} - with: - ref: ${{ inputs.branch }} - repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow - token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-cu12 - name: Find build cache + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@add-nightly-date + name: find cache id: find-cache timeout-minutes: 5 with: @@ -74,9 +46,9 @@ jobs: release oneflow-src: ${{ env.ONEFLOW_SRC }} entries: | - cu122 - cu121 cu118 + cu117 + cu116 cpu - name: Get current date id: date @@ -87,17 +59,14 @@ jobs: MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/release/${{ matrix.entry }} WHEELHOUSE_DIR: manylinux_wheelhouse OSS_DIR: branch/${{ github.ref_name }}/${{ matrix.entry }}/${{ github.sha }} - GITHUB_REF_NAME: ${{ github.ref_name }} - GITHUB_SHA: ${{ github.sha }} - ONEFLOW_OSS_BUCKET: oneflow-staging needs: [generate-build-matrix] name: Staging Release timeout-minutes: 180 runs-on: [self-hosted, linux, release] - if: github.repository == 'Oneflow-Inc/oneflow' || inputs.is_priv + if: github.repository == 'Oneflow-Inc/oneflow' strategy: fail-fast: false - max-parallel: 6 + max-parallel: 5 matrix: ${{ fromJson(needs.generate-build-matrix.outputs.matrix) }} steps: - name: Fix permissions @@ -108,66 +77,10 @@ jobs: python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple python3 -m pip install -U setuptools wheel --user python3 -m pip install oss2 --user - - name: Checkout Oneflow-Inc/oneflow - uses: actions/checkout@v2 - if: ${{ !inputs.is_priv }} - with: - ref: ${{ github.event.pull_request.head.sha }} - repository: ${{github.event.pull_request.head.repo.full_name}} - - name: Checkout private oneflow - uses: actions/checkout@v2 - if: ${{ inputs.is_priv }} - with: - ref: ${{ inputs.branch }} - repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow - token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - - name: Checkout cutlass_extension - uses: actions/checkout@v2 - if: ${{ inputs.is_priv }} - with: - repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/cutlass-extension - token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - path: cutlass-extension - - name: Set Private env - if: ${{ inputs.is_priv }} - run: | - GITHUB_SHA=$(git rev-parse HEAD) - echo "OSS_DIR=branch/${{ inputs.branch }}/${{ matrix.entry }}/${GITHUB_SHA}" >> $GITHUB_ENV - echo "GITHUB_REF_NAME=${{ inputs.branch }}" >> $GITHUB_ENV - echo "GITHUB_SHA=${GITHUB_SHA}" >> $GITHUB_ENV - echo "ONEFLOW_OSS_BUCKET=${{ secrets.ONEFLOW_PRIV_OSS_BUCKET }}" >> $GITHUB_ENV - - name: Print env - if: ${{ inputs.is_priv }} - run: | - env - - uses: Oneflow-Inc/get-oneflow@support-cu12 - name: Build ${{ matrix.entry }} - if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }} - with: - cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/release/cu118.cmake - build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc9.sh - oneflow-src: ${{ env.ONEFLOW_SRC }} - oneflow-build-env: manylinux - wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} - clear-wheelhouse-dir: true - self-hosted: true - compute-platform: ${{ matrix.entry }} - manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} - docker-run-use-system-http-proxy: false - docker-run-use-lld: false - retry-failed-build: true - clean-ccache: true - nightly: ${{ github.event_name == 'schedule' || github.ref == 'refs/heads/release/add_nightly_date_index'}} - nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }} - use-nvidia-wheels: ${{ matrix.entry !='cu112' }} - python-versions: | - 3.7 - 3.8 - 3.9 - 3.10 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: actions/checkout@v2 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build ${{ matrix.entry }} - if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }} + if: ${{ matrix.entry !='cpu' }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/release/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc9.sh @@ -190,7 +103,7 @@ jobs: 3.8 3.9 3.10 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cpu' }} with: @@ -218,7 +131,7 @@ jobs: uses: ./.github/actions/upload_oss with: src_path: ${{ env.WHEELHOUSE_DIR }} - oss_dst_path: oss://${{ env.ONEFLOW_OSS_BUCKET }}/${{ env.OSS_DIR }} + oss_dst_path: oss://oneflow-staging/${{ env.OSS_DIR }} oss_access_key_id: ${{ secrets.OSS_ACCESS_KEY_ID }} oss_access_key_secret: ${{ secrets.OSS_ACCESS_KEY_SECRET }} - name: Update pip index @@ -228,13 +141,13 @@ jobs: run: | python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple python3 -m pip install oss2 beautifulsoup4 --user - python3 tools/create_pip_index.py --dir_key ${{ env.OSS_DIR }} -b ${{ env.ONEFLOW_OSS_BUCKET }} \ - --index_key=branch/${{ env.GITHUB_REF_NAME }}/${{ matrix.entry }}/index.html \ - --index_key=branch/${{ env.GITHUB_REF_NAME }}/date/${{ needs.generate-build-matrix.outputs.formatted_date }}/${{ matrix.entry }}/index.html \ + python3 tools/create_pip_index.py --dir_key ${{ env.OSS_DIR }} -b oneflow-staging \ + --index_key=branch/${{ github.ref_name }}/${{ matrix.entry }}/index.html \ + --index_key=branch/${{ github.ref_name }}/date/${{ needs.generate-build-matrix.outputs.formatted_date }}/${{ matrix.entry }}/index.html \ --index_key=${{ env.OSS_DIR }}/index.html \ - --index_key=commit/${{ env.GITHUB_SHA }}/${{ matrix.entry }}/index.html + --index_key=commit/${{ github.sha }}/${{ matrix.entry }}/index.html - name: Update API docs - if: github.ref == 'refs/heads/master' && matrix.entry == 'cpu' && !inputs.is_priv + if: github.ref == 'refs/heads/master' && matrix.entry == 'cpu' env: READTHEDOCS_TOKEN: ${{ secrets.READTHEDOCS_TOKEN }} run: | diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml index 3668e422154..036b0330557 100644 --- a/.github/workflows/simple.yml +++ b/.github/workflows/simple.yml @@ -244,7 +244,7 @@ jobs: repository: Oneflow-Inc/conda-env ref: 30a7f00eb48ee9009d85a848e720823e5054c66b path: conda-env - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build with gcc7 if: ${{ matrix.build-type == 'gcc7'}} with: @@ -253,7 +253,7 @@ jobs: oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml conda-env-name: oneflow-dev-gcc7-v2 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build with clang10 if: ${{ matrix.build-type == 'clang10'}} with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8ee44d87762..8463bc730bb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -176,7 +176,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@add-nightly-date name: find cache id: find-cache timeout-minutes: 5 @@ -219,7 +219,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete@add-nightly-date name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -233,7 +233,7 @@ jobs: run: | echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" exit 1 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build manylinux ${{ matrix.entry }} id: build-cpu if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }} @@ -255,7 +255,7 @@ jobs: python-versions: | 3.7 3.8 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build manylinux ${{ matrix.entry }} id: build-cpu-sanitizers if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }} @@ -276,7 +276,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.8 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build manylinux ${{ matrix.entry }} id: build-cuda if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }} @@ -296,7 +296,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.7 - - uses: Oneflow-Inc/get-oneflow@support-cu12 + - uses: Oneflow-Inc/get-oneflow@add-nightly-date name: Build ${{ matrix.entry }} if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }} with: @@ -335,7 +335,7 @@ jobs: }) - name: Upload packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@support-cu12 + uses: Oneflow-Inc/get-oneflow/digest/upload@add-nightly-date timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -346,7 +346,7 @@ jobs: dst-dir: cpack - name: Upload whl if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@support-cu12 + uses: Oneflow-Inc/get-oneflow/digest/upload@add-nightly-date timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -371,7 +371,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@add-nightly-date name: find cache id: find-cache timeout-minutes: 5 @@ -402,7 +402,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@add-nightly-date name: find cache id: find-cache timeout-minutes: 5 @@ -484,7 +484,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete@add-nightly-date name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -500,7 +500,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-cu12 + uses: Oneflow-Inc/get-oneflow/digest/download@add-nightly-date id: download-digest timeout-minutes: 10 with: @@ -510,7 +510,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Get primary node if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/master-address@support-cu12 + uses: Oneflow-Inc/get-oneflow/master-address@add-nightly-date id: get-primary-node with: rank: ${{ matrix.rank }} @@ -710,7 +710,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete@add-nightly-date name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -726,7 +726,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-cu12 + uses: Oneflow-Inc/get-oneflow/digest/download@add-nightly-date id: download-digest timeout-minutes: 10 with: @@ -736,7 +736,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download ASAN and UBSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-cu12 + uses: Oneflow-Inc/get-oneflow/digest/download@add-nightly-date id: asan-ubsan-download-digest timeout-minutes: 10 with: @@ -746,7 +746,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download TSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@support-cu12 + uses: Oneflow-Inc/get-oneflow/digest/download@add-nightly-date id: tsan-download-digest timeout-minutes: 10 with: @@ -934,7 +934,7 @@ jobs: issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: "View latest API docs preview at: https://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ env.DOCS_PATH }}/" + body: "View latest API docs preview at: https://staging.oneflow.info/${{ env.DOCS_PATH }}/" }) - name: Doctest timeout-minutes: 45 @@ -1072,7 +1072,7 @@ jobs: - name: Benchmark Test timeout-minutes: 100 if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }} - uses: Oneflow-Inc/get-oneflow/pytest-benchmark@support-cu12 + uses: Oneflow-Inc/get-oneflow/pytest-benchmark@add-nightly-date with: collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark container-name: ${{ env.TEST_CONTAINER_NAME }} @@ -1133,7 +1133,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} fetch-depth: 0 - - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cu12 + - uses: Oneflow-Inc/get-oneflow/cache-complete@add-nightly-date name: Save cache if successful id: save-cache timeout-minutes: 5 diff --git a/README.md b/README.md index 4d4b127ed3e..e01af3989f1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # OneFlow OneFlow is a deep learning framework designed to be **user-friendly, scalable and efficient**. With OneFlow, it is easy to: - - program a model with [**PyTorch-like API**](https://oneflow.readthedocs.io/en/master/) - scale a model to n-dimensional-parallel execution with the [**Global Tensor**](https://docs.oneflow.org/en/master/cookies/global_tensor.html) - accelerate/deploy a model with the [**Graph Compiler**](https://oneflow.readthedocs.io/en/master/graph.html). @@ -61,23 +60,24 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an - To install nightly release of OneFlow with CUDA support: ```bash - python3 -m pip install --pre oneflow -f https://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu118 + python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu117 ``` - To install other available builds for different variants: - Stable ```bash - python3 -m pip install --find-links https://release.oneflow.info oneflow==0.9.0+cu118 + python3 -m pip install --find-links https://release.oneflow.info oneflow==0.9.0+cu117 ``` - Nightly ``` - python3 -m pip install --pre oneflow -f https://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/[PLATFORM] + python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM] ``` - All available `[PLATFORM]`: | Platform |CUDA Driver Version| Supported GPUs | |---|---|---| - | cu118 | >= 450.80.02 | GTX 10xx, RTX 20xx, A100, RTX 30xx | + | cu117 | >= 450.80.02 | GTX 10xx, RTX 20xx, A100, RTX 30xx | + | cu102 | >= 440.33 | GTX 10xx, RTX 20xx | | cpu | N/A | N/A | - If you are in China, you could run this to have pip download packages from domestic mirror of pypi: diff --git a/cmake/caches/ci/release/cu118.cmake b/cmake/caches/ci/release/cu118.cmake deleted file mode 100644 index 270afb4409e..00000000000 --- a/cmake/caches/ci/release/cu118.cmake +++ /dev/null @@ -1,17 +0,0 @@ -set(BUILD_CUDA YES CACHE BOOL "") -set(BUILD_GIT_VERSION YES CACHE BOOL "") -set(BUILD_TESTING OFF CACHE BOOL "") -set(BUILD_RDMA YES CACHE BOOL "") -set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "") -set(THIRD_PARTY_MIRROR aliyun CACHE STRING "") -set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "") -set(CMAKE_BUILD_TYPE Release CACHE STRING "") -set(CMAKE_GENERATOR Ninja CACHE STRING "") -set(CMAKE_CUDA_ARCHITECTURES "70-real;80-real;86-real;89-real;90-real" CACHE STRING "") -set(CUDNN_STATIC OFF CACHE BOOL "") -set(WITH_MLIR ON CACHE BOOL "") -set(BUILD_CPP_API OFF CACHE BOOL "") -set(CUDA_NVCC_THREADS_NUMBER 2 CACHE STRING "") -set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "") -set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "") -set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "") diff --git a/docs/requirements.txt b/docs/requirements.txt index 7471b37f813..a72b2dda50c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,5 +5,5 @@ furo==2021.4.11b34 sphinx-copybutton==0.5.0 # above are dev dependencies --pre ---find-links https://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cpu +--find-links https://staging.oneflow.info/branch/master/cpu oneflow diff --git a/docs/source/nn.functional.rst b/docs/source/nn.functional.rst index 6ba341196cb..971dc768749 100644 --- a/docs/source/nn.functional.rst +++ b/docs/source/nn.functional.rst @@ -75,7 +75,6 @@ Non-linear activation functions selu celu leaky_relu - square_relu prelu glu gelu diff --git a/docs/source/nn.rst b/docs/source/nn.rst index 62e52657e86..8cd49bc1d49 100644 --- a/docs/source/nn.rst +++ b/docs/source/nn.rst @@ -160,7 +160,6 @@ Non-linear Activations (weighted sum, nonlinearity) nn.CELU nn.GELU nn.QuickGELU - nn.SquareReLU nn.SiLU nn.Sigmoid nn.Mish diff --git a/docs/source/oneflow.rst b/docs/source/oneflow.rst index 592d3bd6081..972963909bf 100644 --- a/docs/source/oneflow.rst +++ b/docs/source/oneflow.rst @@ -270,7 +270,6 @@ Pointwise Ops fmod gelu quick_gelu - square_relu log log1p log2 diff --git a/oneflow/api/python/framework/device.cpp b/oneflow/api/python/framework/device.cpp index df7278a2dd1..445b953aac4 100644 --- a/oneflow/api/python/framework/device.cpp +++ b/oneflow/api/python/framework/device.cpp @@ -31,10 +31,9 @@ ONEFLOW_API_PYBIND11_MODULE("", m) { .def(py::init([](const std::string& type_or_type_with_device_id) { return Device::ParseAndNew(type_or_type_with_device_id).GetOrThrow(); })) - .def(py::init([](const std::string& type, int64_t index) { - return Device::New(type, index).GetOrThrow(); - }), - py::arg("type"), py::arg("index")) + .def(py::init([](const std::string& type, int64_t device_id) { + return Device::New(type, device_id).GetOrThrow(); + })) .def(py::init([](const Symbol& other_device) { return other_device; })) .def_property_readonly("type", [](const Symbol& d) { return d->type(); }) .def_property_readonly("index", [](const Symbol& d) { return d->device_id(); }) diff --git a/oneflow/core/autograd/gradient_funcs/activation.cpp b/oneflow/core/autograd/gradient_funcs/activation.cpp index 2f388e94cee..03db0d7f49c 100644 --- a/oneflow/core/autograd/gradient_funcs/activation.cpp +++ b/oneflow/core/autograd/gradient_funcs/activation.cpp @@ -152,36 +152,6 @@ class QuickGeLU : public OpExprGradFunction { } }; -struct SquareReLUCaptureState : public AutoGradCaptureState { - bool requires_grad = false; -}; - -class SquareReLU : public OpExprGradFunction { - public: - Maybe Init(const OpExpr& op) override { return Maybe::Ok(); } - - Maybe Capture(SquareReLUCaptureState* ctx, const TensorTuple& inputs, - const TensorTuple& outputs, const AttrMap& attrs) const override { - CHECK_EQ_OR_RETURN(inputs.size(), 1); // NOLINT(maybe-need-error-msg) - CHECK_EQ_OR_RETURN(outputs.size(), 1); // NOLINT(maybe-need-error-msg) - ctx->requires_grad = inputs.at(0)->requires_grad(); - if (!ctx->requires_grad) { return Maybe::Ok(); } - ctx->SaveTensorForBackward(inputs.at(0)); - return Maybe::Ok(); - } - - Maybe Apply(const SquareReLUCaptureState* ctx, const TensorTuple& out_grads, - TensorTuple* in_grads) const override { - CHECK_EQ_OR_RETURN(out_grads.size(), 1); // NOLINT(maybe-need-error-msg) - in_grads->resize(1); - if (ctx->requires_grad) { - const auto& x = ctx->SavedTensors().at(0); - in_grads->at(0) = JUST(functional::SquareReLUGrad(out_grads.at(0), x)); - } - return Maybe::Ok(); - } -}; - class HardSigmoid : public BaseActivation { public: Maybe Apply(const BaseActivationCaptureState* ctx, const TensorTuple& out_grads, @@ -668,7 +638,6 @@ REGISTER_OP_EXPR_GRAD_FUNCTION("softplus", Softplus); REGISTER_OP_EXPR_GRAD_FUNCTION("softshrink", SoftShrink); REGISTER_OP_EXPR_GRAD_FUNCTION("fast_gelu", FastGeLU); REGISTER_OP_EXPR_GRAD_FUNCTION("quick_gelu", QuickGeLU); -REGISTER_OP_EXPR_GRAD_FUNCTION("square_relu", SquareReLU); } // namespace one } // namespace oneflow diff --git a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h index d3f20dce80e..c30835decc2 100644 --- a/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h +++ b/oneflow/core/ep/common/primitive/broadcast_elementwise_binary.h @@ -126,8 +126,7 @@ inline bool IsDimsEquals(size_t num_src0_dims, const int64_t* src0_dims, size_t OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kTanhBackwardWithDyY) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kThresholdBackwardWithDyX) \ OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kFastGeluBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kQuickGeluBackwardWithDyX) \ - OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kSquareReLUBackwardWithDyX) + OF_PP_MAKE_TUPLE_SEQ(BinaryOp::kQuickGeluBackwardWithDyX) #define BINARY_ACTIVATION_BACKWARD_OP_SEQ \ BINARY_ACTIVATION_BACKWARD_OP_SEQ_0 \ diff --git a/oneflow/core/ep/common/primitive/elementwise_unary.h b/oneflow/core/ep/common/primitive/elementwise_unary.h index ec7651e1047..14fcce26feb 100644 --- a/oneflow/core/ep/common/primitive/elementwise_unary.h +++ b/oneflow/core/ep/common/primitive/elementwise_unary.h @@ -86,8 +86,7 @@ namespace primitive { OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kNotEqualZero) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kNanAssign) \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kFastGelu) \ - OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kQuickGelu) \ - OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kSquareReLU) + OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kQuickGelu) #define UNARY_COMPLEX_C2C_OP_SEQ \ OF_PP_MAKE_TUPLE_SEQ(UnaryOp::kConj) \ diff --git a/oneflow/core/ep/cpu/primitive/binary_functor.h b/oneflow/core/ep/cpu/primitive/binary_functor.h index 56422deedf6..d479a7a7409 100644 --- a/oneflow/core/ep/cpu/primitive/binary_functor.h +++ b/oneflow/core/ep/cpu/primitive/binary_functor.h @@ -309,16 +309,6 @@ struct BinaryFunctor(1.702); }; -template -struct BinaryFunctor { - OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {} - - OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const { - return static_cast((x > static_cast(0.0)) ? static_cast(2.0) * x * dy - : static_cast(0.0)); - } -}; - template struct BinaryFunctor { OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {} diff --git a/oneflow/core/ep/cpu/primitive/unary_functor.h b/oneflow/core/ep/cpu/primitive/unary_functor.h index 4745ff7cdc6..5c5a236df07 100644 --- a/oneflow/core/ep/cpu/primitive/unary_functor.h +++ b/oneflow/core/ep/cpu/primitive/unary_functor.h @@ -64,15 +64,6 @@ struct UnaryFunctor { static constexpr Src alpha = static_cast(1.702); }; -template -struct UnaryFunctor { - OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} - - OF_DEVICE_FUNC Dst operator()(Src src) const { - return static_cast((src > static_cast(0.0)) ? src * src : 0); - } -}; - template struct UnaryFunctor { OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} @@ -380,7 +371,6 @@ SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kReciprocalNoNan); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu); -SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kSquareReLU); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma); SPECIALIZATION_CPU_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kTrigamma); diff --git a/oneflow/core/ep/cuda/primitive/binary_functor.cuh b/oneflow/core/ep/cuda/primitive/binary_functor.cuh index 29cf11cf6c4..fa360490659 100644 --- a/oneflow/core/ep/cuda/primitive/binary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/binary_functor.cuh @@ -150,16 +150,6 @@ struct BinaryFunctor(1.702); }; -template -struct BinaryFunctor { - OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {} - - OF_DEVICE_FUNC Dst operator()(Src dy, Src x) const { - return static_cast((x > static_cast(0.0)) ? static_cast(2.0) * x * dy - : static_cast(0.0)); - } -}; - template struct BinaryFunctor { OF_DEVICE_FUNC BinaryFunctor(Scalar attr0, Scalar attr1) {} @@ -415,7 +405,6 @@ SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kTanhBackwardWithDyY); SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kThresholdBackwardWithDyX); SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kFastGeluBackwardWithDyX); SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kQuickGeluBackwardWithDyX); -SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kSquareReLUBackwardWithDyX); SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kAcosBackwardWithDyX); SPECIALIZATION_PSEUDO_BFLOAT16_BINARY_FUNCTOR(BinaryOp::kAcoshBackwardWithDyX); @@ -490,7 +479,6 @@ SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kThresholdBackwardWithDyX); SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kTanhBackwardWithDyY); SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kFastGeluBackwardWithDyX); SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kQuickGeluBackwardWithDyX); -SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kSquareReLUBackwardWithDyX); SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kAcosBackwardWithDyX); SPECIALIZATION_PSEUDO_HALF_BINARY_FUNCTOR(BinaryOp::kAcoshBackwardWithDyX); diff --git a/oneflow/core/ep/cuda/primitive/unary_functor.cuh b/oneflow/core/ep/cuda/primitive/unary_functor.cuh index 61441e31288..41dbffb23b0 100644 --- a/oneflow/core/ep/cuda/primitive/unary_functor.cuh +++ b/oneflow/core/ep/cuda/primitive/unary_functor.cuh @@ -70,15 +70,6 @@ struct UnaryFunctor { static constexpr Src alpha = static_cast(1.702); }; -template -struct UnaryFunctor { - OF_DEVICE_FUNC UnaryFunctor(Scalar attr0, Scalar attr1) {} - - OF_DEVICE_FUNC Dst operator()(Src src) const { - return static_cast((src > static_cast(0.0)) ? src * src : 0); - } -}; - namespace unary_functor_internal { namespace { @@ -500,7 +491,6 @@ SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kReciprocalNoNan); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kNotEqualZero); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kNanAssign); SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kQuickGelu); -SPECIALIZATION_PSEUDO_HALF_UNARY_FUNCTOR(UnaryOp::kSquareReLU); /*********nv_bfloat16_kernel*******/ @@ -568,7 +558,6 @@ SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNotEqualZero); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kNanAssign); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kFastGelu); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kQuickGelu); -SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kSquareReLU); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kDigamma); SPECIALIZATION_PSEUDO_BFLOAT16_UNARY_FUNCTOR(UnaryOp::kTrigamma); diff --git a/oneflow/core/ep/include/primitive/binary_op.h b/oneflow/core/ep/include/primitive/binary_op.h index fea46ff4c3f..e505fdae1f4 100644 --- a/oneflow/core/ep/include/primitive/binary_op.h +++ b/oneflow/core/ep/include/primitive/binary_op.h @@ -109,7 +109,6 @@ enum class BinaryOp { kTanBackwardWithDyX, kFastGeluBackwardWithDyX, kQuickGeluBackwardWithDyX, - kSquareReLUBackwardWithDyX, }; } diff --git a/oneflow/core/ep/include/primitive/unary_op.h b/oneflow/core/ep/include/primitive/unary_op.h index 62acd0276a0..cab540adb4a 100644 --- a/oneflow/core/ep/include/primitive/unary_op.h +++ b/oneflow/core/ep/include/primitive/unary_op.h @@ -43,7 +43,6 @@ enum class UnaryOp { kThreshold, kFastGelu, kQuickGelu, - kSquareReLU, // math op kAbs, kAcos, diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml index 3323db0e93c..8fd34401c1b 100644 --- a/oneflow/core/functional/functional_api.yaml +++ b/oneflow/core/functional/functional_api.yaml @@ -755,14 +755,6 @@ signature: "Tensor (Tensor dy, Tensor x) => QuickGeluGrad" bind_python: False -- name: "square_relu" - signature: "Tensor (Tensor x) => SquareReLU" - bind_python: True - -- name: "square_relu_grad" - signature: "Tensor (Tensor dy, Tensor x) => SquareReLUGrad" - bind_python: False - - name: "gelu_with_approximate" signature: 'Tensor (Tensor x, String approximate="none") => GeluWithApproximate' bind_python: True diff --git a/oneflow/core/functional/impl/activation_functor.cpp b/oneflow/core/functional/impl/activation_functor.cpp index b0446f79bd8..2aa2d19fb6e 100644 --- a/oneflow/core/functional/impl/activation_functor.cpp +++ b/oneflow/core/functional/impl/activation_functor.cpp @@ -247,21 +247,6 @@ class QuickGeluGradFunctor : public BinaryFunctor { } }; -class SquareReLUFunctor : public UnaryFunctor { - public: - SquareReLUFunctor() { - op_ = CHECK_JUST(one::OpBuilder("square_relu").Input("x").Output("y").Build()); - } -}; - -class SquareReLUGradFunctor : public BinaryFunctor { - public: - SquareReLUGradFunctor() { - op_ = - CHECK_JUST(one::OpBuilder("square_relu_grad").Input("dy").Input("x").Output("dx").Build()); - } -}; - class GluFunctor { public: GluFunctor() {} @@ -794,8 +779,6 @@ ONEFLOW_FUNCTION_LIBRARY(m) { m.add_functor("FastGeluGrad"); m.add_functor("QuickGelu"); m.add_functor("QuickGeluGrad"); - m.add_functor("SquareReLU"); - m.add_functor("SquareReLUGrad"); m.add_functor("Glu"); m.add_functor("HardSigmoid"); m.add_functor("HardSigmoidGrad"); diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp index ef4f2f92070..995cc937bdb 100644 --- a/oneflow/core/functional/impl/nn_functor.cpp +++ b/oneflow/core/functional/impl/nn_functor.cpp @@ -5223,8 +5223,6 @@ class GroupedMatmulFunctor { Maybe operator()(const TensorTuple& xs, const TensorTuple& weights) const { const int64_t input_size = xs.size(); const int64_t weight_size = weights.size(); - CHECK_LT_OR_RETURN(input_size, kMaxInputCount) - << Error::RuntimeError() << "input_size size should not be greater than 128"; CHECK_GE_OR_RETURN(input_size, 1) << Error::RuntimeError() << "The number of xs should be greater equal than 1."; CHECK_EQ_OR_RETURN(weight_size, input_size) diff --git a/oneflow/core/graph/stream_id.cpp b/oneflow/core/graph/stream_id.cpp index 2025ee676e1..cc55718fd4e 100644 --- a/oneflow/core/graph/stream_id.cpp +++ b/oneflow/core/graph/stream_id.cpp @@ -20,9 +20,9 @@ namespace oneflow { // StreamId encoding (bits) // | reserved | node_index | device_type | device_index | stream_index | -// | -- 18 -- | ----- 19 ----- | ---- 5 ---- | ----- 7 ----- | | +// | -- 21 -- | ----- 19 ----- | ---- 5 ---- | ----- 7 ----- | | // | | DeviceId | | -// | | ------------------- 31 --------------------- | ---- 15 ---- | +// | | ------------------- 31 --------------------- | ---- 12 ---- | // | StreamId | // | -------------------------------- 64 ---------------------------------- | diff --git a/oneflow/core/graph/task_id.cpp b/oneflow/core/graph/task_id.cpp index 1cb1bf1db6f..00d3b0b1483 100644 --- a/oneflow/core/graph/task_id.cpp +++ b/oneflow/core/graph/task_id.cpp @@ -20,9 +20,9 @@ namespace oneflow { // TaskId encoding (maybe extended to 128 bits in future) // | rank | device_type | device_index | | -// | ----------- 16 ----------- | ---- 5 ---- | ----- 7 ----- | | +// | ----------- 19 ----------- | ---- 5 ---- | ----- 7 ----- | | // | DeviceId | stream_index | | -// | ------------------------- 31 --------------------------- | ---- 15 ---- | | +// | ------------------------- 31 --------------------------- | ---- 12 ---- | | // | StreamId | task_index | // | -------------------------------- 43 ----------------------------------- | --- 21 --- | // | TaskId | diff --git a/oneflow/ir/include/OneFlow/OneFlowUserOps.td b/oneflow/ir/include/OneFlow/OneFlowUserOps.td index 05f5ea56bc3..a05cb67c91d 100644 --- a/oneflow/ir/include/OneFlow/OneFlowUserOps.td +++ b/oneflow/ir/include/OneFlow/OneFlowUserOps.td @@ -344,20 +344,6 @@ def OneFlow_QuickGeluGradOp : OneFlow_BaseOp<"quick_gelu_grad", [NoMemoryEffect, let has_data_type_infer_fn = 1; } -def OneFlow_SquareReLUGradOp : OneFlow_BaseOp<"square_relu_grad", [NoMemoryEffect, DeclareOpInterfaceMethods]> { - let input = (ins - OneFlow_Tensor:$x, - OneFlow_Tensor:$dy - ); - let output = (outs - OneFlow_Tensor:$dx - ); - let has_logical_tensor_desc_infer_fn = 1; - let has_physical_tensor_desc_infer_fn = 1; - let has_get_sbp_fn = 1; - let has_data_type_infer_fn = 1; -} - def OneFlow_GridSampleOp : OneFlow_BaseOp<"grid_sample", [NoMemoryEffect, DeclareOpInterfaceMethods]> { let input = (ins OneFlow_Tensor:$input, @@ -10428,19 +10414,6 @@ def OneFlow_QuickGeluOp : OneFlow_BaseOp<"quick_gelu", [NoMemoryEffect, DeclareO let has_data_type_infer_fn = 1; } -def OneFlow_SquareReLUOp : OneFlow_BaseOp<"square_relu", [NoMemoryEffect, DeclareOpInterfaceMethods]> { - let input = (ins - OneFlow_Tensor:$x - ); - let output = (outs - OneFlow_Tensor:$y - ); - let has_logical_tensor_desc_infer_fn = 1; - let has_physical_tensor_desc_infer_fn = 1; - let has_get_sbp_fn = 1; - let has_data_type_infer_fn = 1; -} - def OneFlow_HardsigmoidOp : OneFlow_BaseOp<"hardsigmoid", [NoMemoryEffect, DeclareOpInterfaceMethods]> { let input = (ins OneFlow_Tensor:$in diff --git a/oneflow/ir/lib/OneFlow/Passes.cpp b/oneflow/ir/lib/OneFlow/Passes.cpp index 844a3331a9e..535c18aeabf 100644 --- a/oneflow/ir/lib/OneFlow/Passes.cpp +++ b/oneflow/ir/lib/OneFlow/Passes.cpp @@ -944,6 +944,7 @@ struct KernelLaunchPattern : public mlir::OpRewritePattern { int name_index = 0; std::vector current_wrap_ops; + op->dump(); for (auto op_it = ops.begin(); op_it != ops.end(); ++op_it) { auto current_op = &(*op_it); if (!IsPackagable(current_op)) { diff --git a/oneflow/user/kernels/activation_kernels.cpp b/oneflow/user/kernels/activation_kernels.cpp index 3094858cff8..baeab19adaa 100644 --- a/oneflow/user/kernels/activation_kernels.cpp +++ b/oneflow/user/kernels/activation_kernels.cpp @@ -282,32 +282,6 @@ REGISTER_USER_KERNEL("quick_gelu_grad") }) .SetIsMatchedHob(BinaryPrimitiveExists(ep::primitive::BinaryOp::kQuickGeluBackwardWithDyX, "dx", "dy")); -REGISTER_USER_KERNEL("square_relu") - .SetCreateFn([]() { - return user_op::NewOpKernel( - "y", "x", [](user_op::KernelComputeContext* ctx) { - const user_op::TensorDesc* src = ctx->TensorDesc4ArgNameAndIndex("x", 0); - const user_op::TensorDesc* dst = ctx->TensorDesc4ArgNameAndIndex("y", 0); - return ep::primitive::NewPrimitive( - ctx->device_type(), ep::primitive::UnaryOp::kSquareReLU, src->data_type(), - dst->data_type()); - }); - }) - .SetIsMatchedHob(UnaryPrimitiveExists(ep::primitive::UnaryOp::kSquareReLU, "y", "x")); - -REGISTER_USER_KERNEL("square_relu_grad") - .SetCreateFn([]() { - return user_op::NewOpKernel( - "dx", "dy", "x", [](user_op::KernelComputeContext* ctx) { - const user_op::TensorDesc* src = ctx->TensorDesc4ArgNameAndIndex("dy", 0); - const user_op::TensorDesc* dst = ctx->TensorDesc4ArgNameAndIndex("dx", 0); - return ep::primitive::NewPrimitive( - ctx->device_type(), ep::primitive::BinaryOp::kSquareReLUBackwardWithDyX, - src->data_type(), dst->data_type(), 1 /*max_num_dims*/); - }); - }) - .SetIsMatchedHob(BinaryPrimitiveExists(ep::primitive::BinaryOp::kSquareReLUBackwardWithDyX, - "dx", "dy")); REGISTER_USER_KERNEL("leaky_relu") .SetCreateFn([]() { diff --git a/oneflow/user/kernels/grouped_matmul_bias.cu b/oneflow/user/kernels/grouped_matmul_bias.cu index 2022fbec012..c23d9c925b8 100644 --- a/oneflow/user/kernels/grouped_matmul_bias.cu +++ b/oneflow/user/kernels/grouped_matmul_bias.cu @@ -190,13 +190,7 @@ class GroupedMatmulBiasKernel final : public user_op::OpKernel, public user_op:: } void* workspace = ctx->Tensor4ArgNameAndIndex("tmp_buffer", 0)->mut_dptr(); for (const auto& group : groups) { - for (size_t i = 0; i < group.second.size(); i += kMaxProblemBatch) { - std::vector> ptrs( - {group.second.begin() + i, - group.second.begin() + i - + std::min(group.second.size() - i, kMaxProblemBatch)}); - ApplyGroup(group.first, ptrs, has_biases, workspace, ctx->stream()); - } + ApplyGroup(group.first, group.second, has_biases, workspace, ctx->stream()); } } bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } diff --git a/oneflow/user/ops/square_relu_op.cpp b/oneflow/user/ops/square_relu_op.cpp deleted file mode 100644 index 7f64c138888..00000000000 --- a/oneflow/user/ops/square_relu_op.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#include "oneflow/core/framework/framework.h" -#include "oneflow/core/framework/op_generated.h" - -namespace oneflow { - -/*static*/ Maybe SquareReLUOp::InferLogicalTensorDesc(user_op::InferContext* ctx) { - ctx->SetOutputShape("y", 0, ctx->InputShape("x", 0)); - return Maybe::Ok(); -} - -/*static*/ Maybe SquareReLUOp::InferPhysicalTensorDesc(user_op::InferContext* ctx) { - return InferLogicalTensorDesc(ctx); -} - -/*static*/ Maybe SquareReLUOp::InferDataType(user_op::InferContext* ctx) { - ctx->SetOutputDType("y", 0, ctx->InputDType("x", 0)); - return Maybe::Ok(); -} - -/*static*/ Maybe SquareReLUOp::GetSbp(user_op::SbpContext* ctx) { - const user_op::TensorDesc& in_tensor = ctx->LogicalTensorDesc4InputArgNameAndIndex("x", 0); - FOR_RANGE(int64_t, i, 0, in_tensor.shape().NumAxes()) { - ctx->NewBuilder().Split(user_op::OpArg("x", 0), i).Split(user_op::OpArg("y", 0), i).Build(); - } - return Maybe::Ok(); -} - -/*static*/ Maybe SquareReLUGradOp::InferLogicalTensorDesc(user_op::InferContext* ctx) { - const Shape& x_shape = ctx->InputShape("x", 0); - const Shape& dy_shape = ctx->InputShape("dy", 0); - CHECK_OR_RETURN(dy_shape == x_shape) - << "InferTensorDesc failed (" << ctx->op_name() << "). Expected x shape " - << x_shape.ToString() << " to be equal to dy shape " << dy_shape.ToString(); - ctx->SetOutputShape("dx", 0, dy_shape); - return Maybe::Ok(); -} - -/*static*/ Maybe SquareReLUGradOp::InferPhysicalTensorDesc(user_op::InferContext* ctx) { - return InferLogicalTensorDesc(ctx); -} - -/*static*/ Maybe SquareReLUGradOp::InferDataType(user_op::InferContext* ctx) { - CHECK_EQ_OR_RETURN(ctx->InputDType("x", 0), ctx->InputDType("dy", 0)) - << "InferDataType Failed. Expected " << DataType_Name(ctx->InputDType("dy", 0)) - << ", but got " << DataType_Name(ctx->InputDType("x", 0)); - ctx->SetOutputDType("dx", 0, ctx->InputDType("x", 0)); - return Maybe::Ok(); -} - -/*static*/ Maybe SquareReLUGradOp::GetSbp(user_op::SbpContext* ctx) { - const user_op::TensorDesc& x_tensor = ctx->LogicalTensorDesc4InputArgNameAndIndex("x", 0); - FOR_RANGE(int64_t, i, 0, x_tensor.shape().NumAxes()) { - ctx->NewBuilder() - .Split(user_op::OpArg("x", 0), i) - .Split(user_op::OpArg("dy", 0), i) - .Split(user_op::OpArg("dx", 0), i) - .Build(); - } - return Maybe::Ok(); -} - -} // namespace oneflow diff --git a/python/oneflow/__init__.py b/python/oneflow/__init__.py index 6dc7520a410..85faa262056 100644 --- a/python/oneflow/__init__.py +++ b/python/oneflow/__init__.py @@ -130,7 +130,6 @@ def use_deterministic_algorithms(mode, *, warn_only=False): from oneflow._C import quantile from oneflow._C import gelu_with_approximate as gelu from oneflow._C import quick_gelu -from oneflow._C import square_relu from oneflow._C import mish from oneflow._C import repeat from oneflow._C import repeat_interleave diff --git a/python/oneflow/_dynamo/__init__.py b/python/oneflow/_dynamo/__init__.py deleted file mode 100644 index abc1eea891a..00000000000 --- a/python/oneflow/_dynamo/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import warnings - -# Reference: https://github.com/pytorch/pytorch/blob/v2.0.1/torch/_dynamo/__init__.py -__all__ = [ - "allow_in_graph", -] - - -def allow_in_graph(fn): - """ - """ - if isinstance(fn, (list, tuple)): - return [allow_in_graph(x) for x in fn] - assert callable(fn), "allow_in_graph expects a callable" - warnings.warn( - "The oneflow._dynamo.allow_in_graph interface is just to align the torch._dynamo.allow_in_graph interface and has no practical significance." - ) - return fn diff --git a/python/oneflow/framework/args_tree.py b/python/oneflow/framework/args_tree.py index 50f4e6a7fcb..afd38c9907b 100644 --- a/python/oneflow/framework/args_tree.py +++ b/python/oneflow/framework/args_tree.py @@ -41,15 +41,12 @@ class NamedArg(object): named_input = NamedArg([NamedArg(1), NamedArg({key: NamedArg("value")})]) """ - def __init__( - self, prefix="", name=None, global_index=0, tensor_type=Tensor - ) -> None: + def __init__(self, prefix="", name=None, global_index=0) -> None: self._name = name if name is not None else str(global_index) self._prefix = prefix self._global_index = global_index self._is_value_set = False self._value = None - self._tensor_type = tensor_type def prefix(self): return self._prefix @@ -89,28 +86,21 @@ def __repr__(self): repr_str += "LIST" elif _is_raw_type(self._value, dict) or _is_raw_type(self._value, OrderedDict): repr_str += "DICT" - elif isinstance(self._value, self._tensor_type): + elif isinstance(self._value, Tensor): repr_str += "TENSOR" elif self._value is None: repr_str += "NONE" else: repr_str += "OPAQUE" - - if isinstance(self._value, self._tensor_type): - repr_str += ( - ", value: tensor(" - + str(self._value.shape) - + ", " - + str(self._value.dtype) - + ")" - ) + if isinstance(self._value, Tensor): + repr_str += ", value: " + self._value._meta_repr() elif ( _is_raw_type(self._value, dict) or _is_raw_type(self._value, OrderedDict) or _is_raw_type(self._value, list) or _is_raw_type(self._value, tuple) ): - repr_str += ", value: " + repr(self._value) + pass else: repr_str += ", value: " + repr(self._value) repr_str += ")" @@ -124,7 +114,6 @@ def __init__( gen_name: bool = False, root_prefix: str = "", root_name: str = None, - tensor_type=Tensor, ) -> None: self._io_args = io_args @@ -133,7 +122,6 @@ def __init__( self._root_name = root_name self._named_io_args = None self._next_global_index = 0 - self._tensor_type = tensor_type if self._gen_name: self._named_io_args = self._construct_named_io_args( @@ -190,7 +178,7 @@ def iter_named_nodes(self): yield (named_node.prefix() + "_" + named_node.name(), named_node) def _construct_named_io_args(self, value, prefix: str, name: str) -> NamedArg: - arg = NamedArg(prefix, name, self._next_global_index, self._tensor_type) + arg = NamedArg(prefix, name, self._next_global_index) self._next_global_index += 1 if _is_raw_type(value, list) or _is_raw_type(value, tuple): @@ -231,7 +219,7 @@ def map_tuple_leaf(self, map_function: Callable): stack = [] # Cases handled: tuple(tensor, ...), such as input args. - if len(self._io_args) > 0 and isinstance(self._io_args[0], self._tensor_type): + if len(self._io_args) > 0 and isinstance(self._io_args[0], Tensor): for i in self._io_args: mapped_value = map_function(i) stack.append(mapped_value) @@ -245,7 +233,7 @@ def map_tuple_leaf(self, map_function: Callable): elif ( len(self._io_args) > 0 and isinstance(self._io_args[0], (tuple, list)) - and all(isinstance(arg, self._tensor_type) for arg in self._io_args[0]) + and all(isinstance(arg, Tensor) for arg in self._io_args[0]) ): for i in self._io_args[0]: mapped_value = map_function(i) @@ -295,9 +283,3 @@ def _execute_mapping(self, value, map_function): mapped_value = map_function(value) return mapped_value - - def __repr__(self): - if self._named_io_args: - return self._named_io_args.__repr__() - else: - return str(self.__class__) diff --git a/python/oneflow/framework/docstr/activation.py b/python/oneflow/framework/docstr/activation.py index 402b0f56c6e..dc1e8f48391 100644 --- a/python/oneflow/framework/docstr/activation.py +++ b/python/oneflow/framework/docstr/activation.py @@ -136,28 +136,6 @@ """, ) -add_docstr( - oneflow._C.square_relu, - r""" - square_relu(x: Tensor) -> Tensor - - Applies the relu^2 activation introduced in https://arxiv.org/abs/2109.08668v2 - - .. math:: - \\text{ReLU}(x) = \\max(0, x) * \\max(0, x) - - Args: - input (oneflow.Tensor): Input Tensor - - Returns: - oneflow.Tensor: A Tensor has same shape as the input. - - See - :class:`~oneflow.nn.SquareReLU` for more details. - - """, -) - add_docstr( oneflow._C.softmax, r""" diff --git a/python/oneflow/nn/__init__.py b/python/oneflow/nn/__init__.py index b4465a82ad0..864a6e9bd8d 100644 --- a/python/oneflow/nn/__init__.py +++ b/python/oneflow/nn/__init__.py @@ -20,7 +20,6 @@ CELU, GELU, QuickGELU, - SquareReLU, GLU, Hardsigmoid, Hardshrink, diff --git a/python/oneflow/nn/functional/__init__.py b/python/oneflow/nn/functional/__init__.py index 8019945c366..21afe67ab6d 100644 --- a/python/oneflow/nn/functional/__init__.py +++ b/python/oneflow/nn/functional/__init__.py @@ -40,7 +40,6 @@ from oneflow._C import max_unpool3d from oneflow._C import cosine_similarity, pairwise_distance from oneflow._C import relu -from oneflow._C import square_relu from oneflow._C import hardtanh from oneflow._C import hardsigmoid from oneflow._C import hardshrink diff --git a/python/oneflow/nn/graph/cache.py b/python/oneflow/nn/graph/cache.py index 059cece917a..7870e224656 100644 --- a/python/oneflow/nn/graph/cache.py +++ b/python/oneflow/nn/graph/cache.py @@ -20,7 +20,7 @@ from oneflow.framework.args_tree import ArgsTree from oneflow.framework.tensor import Tensor -import oneflow +import oneflow as flow class LRUCache(object): @@ -134,28 +134,6 @@ def runtime_state_dict( destination[state_dict["graph_name"]] = state_dict return destination - @staticmethod - def runtime_state_dict_to( - state_dict: Union[ - Dict[str, Union[Dict[str, Tensor], str]], - Dict[str, Dict[str, Union[Dict[str, Tensor], str]]], - ], - device: str, - ) -> Union[ - Dict[str, Union[Dict[str, Tensor], str]], - Dict[str, Dict[str, Union[Dict[str, Tensor], str]]], - ]: - destination = OrderedDict() - destination._metadata = OrderedDict() - for (key, sub_state_dict) in state_dict.items(): - dest_sub_state_dict = oneflow.nn.Graph.runtime_state_dict_to( - sub_state_dict, device - ) - dest_sub_state_dict["cache_order"] = sub_state_dict["cache_order"] - dest_sub_state_dict["cache_key"] = sub_state_dict["cache_key"] - destination[key] = dest_sub_state_dict - return destination - def _init_and_get_a_graph_in_cache(self, cache_key): self._base_graph._print( 0, diff --git a/python/oneflow/nn/graph/graph.py b/python/oneflow/nn/graph/graph.py index 8fdba595fe9..2b6aca3627c 100644 --- a/python/oneflow/nn/graph/graph.py +++ b/python/oneflow/nn/graph/graph.py @@ -52,9 +52,6 @@ GraphIR, seq_to_func_return, sys_exc_error_msg, - _rsd_sub_destination_to, - _job_to, - _plan_to, ) from oneflow.framework.args_tree import ArgsTree from oneflow.nn.modules.module import Module @@ -1072,35 +1069,34 @@ def _fill_sub_destination(dest_dict, name_list, tensor_tuple): assert len(tensor_tuple) == len(name_list) for name_idx in range(len(name_list)): tensor_item = tensor_tuple[name_idx] - device_str = ":".join( - (tensor_item.device.type, str(tensor_item.device.index)) - ) - dest_dict[name_list[name_idx]] = (tensor_item, device_str) + dest_dict[name_list[name_idx]] = (tensor_item, tensor_item.device.type) # This is original outputs is needed to build output buffer. tuple_idx = -1 - def gen_index_in_tuple(item): + def gen_index_in_tuple(eager_out): nonlocal tuple_idx - if isinstance(item, Tensor): - tuple_idx += 1 - return "_OFTPI" + str(tuple_idx) # oneflow tuple index - else: - return item + tuple_idx += 1 + return "_OFTPI" + str(tuple_idx) # oneflow tuple index inputs_sub_destination = OrderedDict() _fill_sub_destination( inputs_sub_destination, self._input_op_names, self._inputs_tensor_tuple ) - _eager_inputs_args, _eager_inputs_kwargs = self.__map_io_lite( - gen_index_in_tuple, *self.inputs_original[0], **self.inputs_original[1], + _eager_inputs_args, _eager_inputs_kwargs = self.__map_io( + "input", + gen_index_in_tuple, + *self.inputs_original[0], + **self.inputs_original[1], ) destination["inputs"] = inputs_sub_destination destination["inputs_original"] = (_eager_inputs_args, _eager_inputs_kwargs) tuple_idx = -1 - _eager_outputs, _ = self.__map_io_lite(gen_index_in_tuple, *self._eager_outputs) + _eager_outputs, _ = self.__map_io( + "output", gen_index_in_tuple, *self._eager_outputs + ) destination["outputs_original"] = _eager_outputs assert len(self._outputs_tensor_tuple) == tuple_idx + 1 outputs_sub_destination = OrderedDict() @@ -1150,7 +1146,7 @@ def load_runtime_state_dict( Dict[str, Dict[str, Union[Dict[str, Tensor], str]]], ], *, - warmup_with_run: bool = True, + warmup_with_run: bool = False, ) -> None: if self._run_with_cache == True: return self._dynamic_input_graph_cache.load_runtime_state_dict( @@ -1297,7 +1293,6 @@ def get_tensor_in_tuple(tensor_tuple, map_item): self.__run( *_eager_inputs_args, **_eager_inputs_kwargs ) # pre-run to warm up - oneflow._oneflow_internal.eager.Sync() build_graph_end = time.perf_counter() self.__print( 0, @@ -1309,53 +1304,6 @@ def get_tensor_in_tuple(tensor_tuple, map_item): + "\n", ) - @staticmethod - def runtime_state_dict_to( - state_dict: Union[ - Dict[str, Union[Dict[str, Tensor], str]], - Dict[str, Dict[str, Union[Dict[str, Tensor], str]]], - ], - device: str, - ) -> Union[ - Dict[str, Union[Dict[str, Tensor], str]], - Dict[str, Dict[str, Union[Dict[str, Tensor], str]]], - ]: - if "job_id" not in state_dict: - from oneflow.nn.graph.cache import GraphCache - - return GraphCache.runtime_state_dict_to(state_dict, device) - - dest_device = oneflow.device(device) - assert dest_device.type == "cuda", "device must be cuda." - - destination = OrderedDict() - destination._metadata = OrderedDict() - destination["oneflow_version"] = state_dict["oneflow_version"] - destination["graph_name"] = state_dict["graph_name"] - destination["job_id"] = state_dict["job_id"] - destination["inputs"] = _rsd_sub_destination_to(state_dict["inputs"], device) - destination["inputs_original"] = state_dict["inputs_original"] - destination["outputs"] = _rsd_sub_destination_to(state_dict["outputs"], device) - destination["outputs_original"] = state_dict["outputs_original"] - destination["oneflow_with_eager_tensor"] = state_dict[ - "oneflow_with_eager_tensor" - ] - if "states" in state_dict: - destination["states"] = _rsd_sub_destination_to( - state_dict["states"], device - ) - destination["exe_plan"] = _plan_to(state_dict["exe_plan"], dest_device) - if "forward_graph" in state_dict: - forward_graph = deepcopy(state_dict["forward_graph"]) - _job_to(forward_graph, dest_device) - destination["forward_graph"] = forward_graph - if "compile_graph" in state_dict: - compile_graph = deepcopy(state_dict["compile_graph"]) - _job_to(compile_graph, dest_device) - destination["compile_graph"] = compile_graph - destination["id_state"] = state_dict["id_state"] - return destination - def build_graph(self, *args, **kwargs): # Build graph try: @@ -1800,13 +1748,14 @@ def __build_io(self, io_type, build_func, *args, **kwargs): args_repr = [] tensor2op_name = {} - def build_tensor_or_any(tensor, name, repr_str): + def build_tensor_or_none(tensor, name, repr_str): + assert tensor is None or (isinstance(tensor, Tensor)) if isinstance(tensor, Tensor): build_arg = build_func(name, tensor) op_names.append(name) tensor2op_name[build_arg] = name else: - build_arg = tensor + build_arg = None args_repr.append(repr_str) self.__print(0, 1, repr_str) @@ -1822,13 +1771,18 @@ def leaf_arg_fn(arg): arg_repr = self.__io_item_check_and_gen_repr( arg.value(), Tensor, io_type, name ) - build_arg = build_tensor_or_any(arg.value(), name, arg_repr) + build_arg = build_tensor_or_none(arg.value(), name, arg_repr) return build_arg + elif arg.value() is None: + arg_repr = self.__io_item_check_and_gen_repr( + arg.value(), None, io_type, name + ) + build_arg = build_tensor_or_none(arg.value(), name, arg_repr) else: # Opaque + # Error arg_repr = self.__io_item_check_and_gen_repr( arg.value(), None, io_type, name ) - build_arg = build_tensor_or_any(arg.value(), name, arg_repr) out = args_tree.map_leaf(leaf_arg_fn) build_args = out[0] @@ -1838,7 +1792,7 @@ def leaf_arg_fn(arg): def __io_item_check_and_gen_repr(self, item, expect_type, io_type, name): assert io_type in ("input", "output") - if expect_type is None: + if expect_type is None and item is None: repr_str = ( "[WARNING](" + io_type.upper() @@ -1848,7 +1802,6 @@ def __io_item_check_and_gen_repr(self, item, expect_type, io_type, name): + str(type(item)) + ")" ) - self.__print(1, 0, repr_str) return repr_str elif expect_type is not None and isinstance(item, expect_type): if isinstance(item, Tensor): @@ -1878,21 +1831,27 @@ def __io_item_check_and_gen_repr(self, item, expect_type, io_type, name): def __map_io(self, io_type, func, *args, **kwargs): assert io_type in ("input", "output") - def mapping_tensor_or_any(tensor): + def mapping_tensor_or_none(tensor): + assert tensor is None or (isinstance(tensor, Tensor)) if isinstance(tensor, Tensor): mapped_arg = func(tensor) else: - mapped_arg = tensor + mapped_arg = None return mapped_arg def leaf_arg_fn(arg): arg_value = arg.value() - return mapping_tensor_or_any(arg_value) + if isinstance(arg_value, Tensor) or arg_value is None: + return mapping_tensor_or_none(arg_value) + else: + self.__io_item_check( + arg_value, None, io_type, arg.prefix() + "_" + arg.name(), + ) # NOTE(lixiang): Reduce the overhead of traversal and parsing of io args. if self._is_simple_tuple_output or self._is_simple_tuple_input: args_tree = ArgsTree(args, False) - out = args_tree.map_tuple_leaf(mapping_tensor_or_any) + out = args_tree.map_tuple_leaf(mapping_tensor_or_none) return out, kwargs args_tree = ArgsTree( diff --git a/python/oneflow/nn/graph/util.py b/python/oneflow/nn/graph/util.py index f60ecac07f3..f2ad36456a8 100644 --- a/python/oneflow/nn/graph/util.py +++ b/python/oneflow/nn/graph/util.py @@ -16,15 +16,12 @@ import sys from string import Template from typing import Callable, Dict, Union, List, Tuple, Optional -from collections import OrderedDict +import google.protobuf as protobuf from google.protobuf import text_format -from google.protobuf.message import Message import oneflow import oneflow.core.job.job_pb2 as job_pb -import oneflow.core.job.plan_pb2 as plan_pb -import oneflow.core.common.device_type_pb2 as device_type import oneflow.core.operator.op_conf_pb2 as op_conf_util from oneflow.framework.tensor import Tensor @@ -270,7 +267,11 @@ def _op_signature( return True, op_str -def operators_repr(ops: Message, graph_ir: GraphIR, show_op_loc: bool,) -> List[str]: +def operators_repr( + ops: protobuf.pyext._message.RepeatedCompositeContainer, + graph_ir: GraphIR, + show_op_loc: bool, +) -> List[str]: r"""Generate operators' string representation of this module """ graph_proto = graph_ir._graph_proto @@ -311,119 +312,3 @@ def seq_to_func_return(seq, need_unpack=False): if need_unpack: return seq[0] return seq - - -def _rsd_sub_destination_to(origin_dict, dest_device_str): - dest_dict = OrderedDict() - for k, v in origin_dict.items(): - tensor_item, device_str = v - dest_dict[k] = ( - tensor_item.to(device=oneflow.device(dest_device_str), copy=True), - dest_device_str, - ) - return dest_dict - - -def _parallel_conf_to(parallel_conf, dest_device): - if parallel_conf.device_tag == "cuda": - assert len(parallel_conf.device_name) == 1 - parallel_conf.device_name[0] = "@0:" + str(dest_device.index) - - -def _mem_case_to(mem_case, dest_device): - if mem_case.device_type == device_type.DeviceType.kCUDA: - mem_case.device_id = dest_device.index - if ( - mem_case.HasField("pinned_device_type") - and mem_case.pinned_device_type == device_type.DeviceType.kCUDA - ): - mem_case.pinned_device_id = dest_device.index - - -def _job_to(job, dest_device): - for pg in job.placement.placement_group: - _parallel_conf_to(pg.parallel_conf, dest_device) - for bpg in job.placement.blob_placement_group: - _parallel_conf_to(bpg.parallel_conf, dest_device) - - -def _modify_bits(original_num, k, j, new_num): - if k > j: - return original_num - mask = ((1 << (j - k + 1)) - 1) << k - cleared_num = original_num & ~mask - modified_num = cleared_num | ((new_num & ((1 << (j - k + 1)) - 1)) << k) - return modified_num - - -def _get_bits(original_num, k, j): - mask = ((1 << (j - k + 1)) - 1) << k - cleared_num = (original_num & mask) >> k - - return cleared_num - - -def _task_id_to(task_id, dest_device): - if _get_bits(task_id, 43, 48) == 2: - new_id = _modify_bits(task_id, 36, 43, dest_device.index) - - return new_id - else: - return task_id - - -def _thrd_id_to(thrd_id, dest_device): - if _get_bits(thrd_id, 22, 27) == 2: - new_id = _modify_bits(thrd_id, 15, 22, dest_device.index) - return new_id - else: - return thrd_id - - -def _plan_to(plan_str, dest_device): - plan = plan_pb.Plan() - plan.ParseFromString(plan_str) - for task in plan.task: - task.task_id = _task_id_to(task.task_id, dest_device) - task.thrd_id = _thrd_id_to(task.thrd_id, dest_device) - for node in task.exec_sequence.exec_node: - _parallel_conf_to( - node.kernel_conf.op_attribute.parallel_conf_signature.op_parallel_conf, - dest_device, - ) - for name, regst in task.produced_regst_desc.items(): - regst.producer_task_id = _task_id_to(regst.producer_task_id, dest_device) - for c_task_id_idx in range(len(regst.consumer_task_id)): - regst.consumer_task_id[c_task_id_idx] = _task_id_to( - regst.consumer_task_id[c_task_id_idx], dest_device - ) - _mem_case_to(regst.mem_case, dest_device) - for mem_block in plan.block_chunk_list.mem_block: - _mem_case_to(mem_block.mem_case, dest_device) - mem_block.thrd_id_hint = _thrd_id_to(mem_block.thrd_id_hint, dest_device) - for chunk in plan.block_chunk_list.chunk: - _mem_case_to(chunk.mem_case, dest_device) - - new_ctrl_regst_desc_id2producer_task_id = {} - for ( - regst_desc_id, - producer_task_id, - ) in plan.ctrl_regst_desc_info.ctrl_regst_desc_id2producer_task_id.items(): - new_ctrl_regst_desc_id2producer_task_id[regst_desc_id] = _task_id_to( - producer_task_id, dest_device - ) - for ( - regst_desc_id, - producer_task_id, - ) in new_ctrl_regst_desc_id2producer_task_id.items(): - plan.ctrl_regst_desc_info.ctrl_regst_desc_id2producer_task_id[ - regst_desc_id - ] = producer_task_id - - for job_id, op_attr_tab in plan.job_id2op_attribute_ref_table.items(): - for _, op_attr in op_attr_tab.op_name2op_attribute.items(): - _parallel_conf_to( - op_attr.parallel_conf_signature.op_parallel_conf, dest_device - ) - - return plan.SerializeToString() diff --git a/python/oneflow/nn/modules/activation.py b/python/oneflow/nn/modules/activation.py index ad410bb8fcf..1803958d773 100644 --- a/python/oneflow/nn/modules/activation.py +++ b/python/oneflow/nn/modules/activation.py @@ -407,45 +407,6 @@ def forward(self, x): return flow._C.quick_gelu(x) -class SquareReLU(Module): - """ - SquareReLU() -> Tensor - - Applies the relu^2 activation introduced in https://arxiv.org/abs/2109.08668v2 - - .. math:: - :math:`\\text{SquareReLU}(x) = \\max(0, x) * \\max(0, x)` - - Args: - input (oneflow.Tensor): Input Tensor - - Returns: - oneflow.Tensor: A Tensor has same shape as the input. - - For example: - - .. code-block:: python - - >>> import numpy as np - >>> import oneflow as flow - - >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32) - >>> input = flow.Tensor(x) - >>> square_relu = flow.nn.SquareReLU() - - >>> out = square_relu(input) - >>> out - tensor([0.0000, 0.0000, 0.2500], dtype=oneflow.float32) - - """ - - def __init__(self): - super().__init__() - - def forward(self, x): - return flow._C.square_relu(x) - - class Sigmoid(Module): """Applies the element-wise function: diff --git a/python/oneflow/nn/modules/module.py b/python/oneflow/nn/modules/module.py index 512cceb37a6..3bdf4d63ca7 100644 --- a/python/oneflow/nn/modules/module.py +++ b/python/oneflow/nn/modules/module.py @@ -1268,8 +1268,6 @@ def register_forward_hook(self, hook: Callable[..., None]): return handle def _apply(self, fn): - if not hasattr(self, "cpg"): - self.cpg = None if self.cpg is not None: self.cpg = None warnings.warn( diff --git a/python/oneflow/test/expensive/test_graph_multi_graph_v2.py b/python/oneflow/test/expensive/test_graph_multi_graph_v2.py index a4563337630..360ebc8bb90 100644 --- a/python/oneflow/test/expensive/test_graph_multi_graph_v2.py +++ b/python/oneflow/test/expensive/test_graph_multi_graph_v2.py @@ -249,7 +249,7 @@ def build(self, x): @_with_new_session def _test_linear_multi_graph_load( - return_dict, device, with_reshape, state_dict, with_new_input + return_dict, device, with_reshape, state_dict, ): linear = flow.nn.Linear(3, 8, False) linear = linear.to(device) @@ -320,26 +320,25 @@ def build(self, x): test_case1 = np.array_equal(of_lazy_out1.numpy(), of_eager_out1.numpy()) return_dict["load1"] = test_case1 - if with_new_input: - # The following section is for testing the new input shape after completing the load. - input_arr2 = np.array( - [ - [-0.94630778, -0.83378579, -0.87060891], - [2.0289922, -0.28708987, -2.18369248], - [0.08086036, -1.81075924, 1.20752494], - ], - dtype=np.float32, - ) - x2 = flow.tensor(input_arr2, device=device) - of_lazy_out2 = linear_g(x2) - of_eager_out2 = linear_reshape(x2) - test_case2 = np.array_equal(of_lazy_out2.numpy(), of_eager_out2.numpy()) - return_dict["load2"] = test_case2 + # The following section is for testing the new input shape after completing the load. + input_arr2 = np.array( + [ + [-0.94630778, -0.83378579, -0.87060891], + [2.0289922, -0.28708987, -2.18369248], + [0.08086036, -1.81075924, 1.20752494], + ], + dtype=np.float32, + ) + x2 = flow.tensor(input_arr2, device=device) + of_lazy_out2 = linear_g(x2) + of_eager_out2 = linear_reshape(x2) + test_case2 = np.array_equal(of_lazy_out2.numpy(), of_eager_out2.numpy()) + return_dict["load2"] = test_case2 def _graph_save(return_dict, filename, with_eager): state_dict = _test_linear_multi_graph_save( - return_dict, flow.device("cuda:0"), True, with_eager, + return_dict, flow.device("cuda"), True, with_eager, ) print( f"state_dict(with_eager={with_eager}) tensors size ", @@ -353,19 +352,7 @@ def _graph_load(return_dict, filename): state_dict_loaded = flow.load(filename) # load with nn.Graph _test_linear_multi_graph_load( - return_dict, flow.device("cuda"), True, state_dict_loaded, True - ) - print("====> load process done") - - -def _graph_load_to_another_device(return_dict, filename): - state_dict_loaded = flow.load(filename) - new_state_dict = flow.nn.Graph.runtime_state_dict_to( - state_dict_loaded, flow.device("cuda:1") - ) - # load with nn.Graph - _test_linear_multi_graph_load( - return_dict, flow.device("cuda:1"), True, new_state_dict, False + return_dict, flow.device("cuda"), True, state_dict_loaded, ) print("====> load process done") @@ -395,33 +382,6 @@ def _test_linear_multi_graph_save_load_gpu(test_case, with_eager): test_case.assertTrue(check_value, key + " failed.") -def _test_load_to_another_device(test_case, with_eager): - # A graph runtime state dict - with tempfile.NamedTemporaryFile() as f: - # Save a graph - manager = multiprocessing.Manager() - return_dict = manager.dict() - save_p = multiprocessing.get_context("spawn").Process( - target=_graph_save, args=(return_dict, f.name, with_eager), - ) - save_p.start() - save_p.join() - print(save_p) - - # Resume a graph from a graph runtime state dict - load_p = multiprocessing.get_context("spawn").Process( - target=_graph_load_to_another_device, args=(return_dict, f.name) - ) - load_p.start() - load_p.join() - print(load_p) - - # test_case can't be passed into sub process, so we check with return_dict. - # Reference: https://stackoverflow.com/questions/52225003/writing-to-multiple-files-using-multiprocessing-error-typeerror-cannot-seria - for (key, check_value) in return_dict.items(): - test_case.assertTrue(check_value, key + " failed.") - - @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") @flow.unittest.skip_unless_1n1d() class TestLinearMultiGraph(oneflow.unittest.TestCase): @@ -437,9 +397,6 @@ def test_linear_multi_graph_save_load_gpu_with_share(test_case): def test_linear_multi_graph_save_load_gpu_with_share_without_eager(test_case): _test_linear_multi_graph_save_load_gpu(test_case, False) - def test_load_to_another_device(test_case): - _test_load_to_another_device(test_case, False) - if __name__ == "__main__": unittest.main() diff --git a/python/oneflow/test/modules/test_square_relu.py b/python/oneflow/test/modules/test_square_relu.py deleted file mode 100644 index 799ab272316..00000000000 --- a/python/oneflow/test/modules/test_square_relu.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import unittest -from collections import OrderedDict - -import numpy as np -from oneflow.test_utils.test_util import GenArgList - -import oneflow as flow -import oneflow.unittest -import torch - - -class SquareReLUActivation(torch.nn.Module): - """ - Applies the relu^2 activation introduced in https://arxiv.org/abs/2109.08668v2 - """ - - def forward(self, input): - relu_applied = torch.nn.functional.relu(input) - squared = torch.square(relu_applied) - return squared - - -def _test_square_relu(test_case, device): - torch_square_relu = SquareReLUActivation() - x = np.random.randn(2, 4, 3) - torch_x = torch.tensor(x, requires_grad=True, device=torch.device(device)) - oneflow_x = flow.tensor(x, requires_grad=True, device=flow.device(device)) - torch_y = torch_square_relu(torch_x) - oneflow_y = flow._C.square_relu(oneflow_x) - test_case.assertTrue(np.allclose(torch_y.detach().cpu().numpy(), oneflow_y.numpy())) - torch_y_sum = torch_y.sum() - torch_y_sum.backward() - oneflow_y_sum = oneflow_y.sum() - oneflow_y_sum.backward() - test_case.assertTrue( - np.allclose(torch_x.grad.cpu().numpy(), oneflow_x.grad.numpy()) - ) - - -@flow.unittest.skip_unless_1n1d() -class TestModule(flow.unittest.TestCase): - def test_square_relu(test_case): - arg_dict = OrderedDict() - arg_dict["test_fun"] = [_test_square_relu] - arg_dict["device"] = ["cpu", "cuda"] - for arg in GenArgList(arg_dict): - arg[0](test_case, *arg[1:]) - - -if __name__ == "__main__": - unittest.main()