openvinotoolkit · Aniruddha521 · Oct 29, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 31, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -274,6 +274,41 @@ jobs:
           && call .\ov\setupvars.bat
           && python samples\python\greedy_causal_lm\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?"
 
+  cpp-greedy_causal_lm-Chatglm3-6b:
+    runs-on: ubuntu-20.04-16-cores
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Build app
+        run: |
+          source ./ov/setupvars.sh
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - name: Download and convert and model
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model THUDM/chatglm3-6b chatglm3-6b --task text-generation-with-past
+      - run: > 
+          . ./ov/setupvars.sh
+          && timeout 2m ./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./chatglm3-6b/ 69 | diff <(timeout 2m samples/python/greedy_causal_lm/greedy_causal_lm.py ./chatglm3-6b/ 69) -
+        env:
+          PYTHONPATH: "./build"
+
+
   cpp-greedy_causal_lm-Qwen-7B-Chat:
     runs-on: ubuntu-20.04-16-cores
     defaults:
@@ -463,6 +498,7 @@ jobs:
         env:
           PYTHONPATH: "./build/:$PYTHONPATH"
           LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
+
   cpp-prompt_lookup_decoding_lm-ubuntu:
     runs-on: ubuntu-20.04-16-cores
     defaults:
@@ -492,6 +528,7 @@ jobs:
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat --task text-generation-with-past
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model THUDM/chatglm3-6b chatglm3-6b --task text-generation-with-past
       - name: run and compare
         run: |
           source ./ov/setupvars.sh
@@ -534,6 +571,27 @@ jobs:
           assert predicted_greedy == predicted_prompt_lookup
           "
           echo "Prompt lookup" passed
+      - name: run and compare
+        run: |
+          source ./ov/setupvars.sh
+
+          echo 'Code:```python
+          def add(a, b):
+              return a + b
+          ```
+          Question: Can you please add 2 and 3
+          A:' > ./prompt.txt
+
+          ./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./chatglm3-6b/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
+          ./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./chatglm3-6b/ "$(<prompt.txt)" > predictions_greedy.txt
+          python -c "
+          with open('predictions_greedy.txt', 'r') as f:
+              predicted_greedy = f.readline()
+          with open('predictions_prompt_lookup.txt', 'r') as f:
+              predicted_prompt_lookup = f.readline()
+          assert predicted_greedy == predicted_prompt_lookup
+          "
+          echo "Prompt lookup" passed
 
   cpp-Phi-1_5:
     runs-on: ubuntu-20.04-16-cores
@@ -970,7 +1028,7 @@ jobs:
 
   Overall_Status:
     name: ci/gha_overall_status_causal_lm
-    needs: [cpp-multinomial-greedy_causal_lm-ubuntu, cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows,
+    needs: [cpp-multinomial-greedy_causal_lm-ubuntu, cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows,cpp-greedy_causal_lm-Chatglm3-6b,
             cpp-greedy_causal_lm-Qwen-7B-Chat, cpp-beam_search_causal_lm-Qwen1_5-7B-Chat, cpp-beam_search_causal_lm-Phi-2,
             cpp-beam_search_causal_lm-notus-7b-v1, cpp-speculative_decoding_lm-ubuntu, cpp-prompt_lookup_decoding_lm-ubuntu,
             cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,

diff --git a/src/docs/SUPPORTED_MODELS.md b/src/docs/SUPPORTED_MODELS.md
@@ -155,7 +155,9 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
 4. `position_ids` (optional) encodes a position of currently generating token in the sequence and a single `logits` output.
 
 > [!NOTE]
-> Models should belong to the same family and have the same tokenizers.
+>* Models should belong to the same family and have the same tokenizers.
+>* `optimum-cli` requires the `--task text-generation-with-past` argument for the `THUDM/chatglm3-6b` model
+>* The **beam search** is not supported for `ChatGLM3-6B model`.
 
 ## Text 2 image models
 

diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
@@ -25,6 +25,7 @@ def get_models_list():
         "microsoft/phi-1_5",
         "microsoft/phi-2",
         "THUDM/chatglm2-6b",
+        "THUDM/chatglm3-6b", # no beam_search
         "Qwen/Qwen2-0.5B-Instruct",
         "Qwen/Qwen-7B-Chat",
         "Qwen/Qwen1.5-7B-Chat",