Skip to content

Commit fff956c

Browse files
authored
Use pytorch-labs/tokenizers and remove tokenizer/ (#1401)
* Use pytorch-labs/tokenizers and remove tokenizer/ Summary: Use our shiny new repo https://github.com/pytorch-labs/tokenizers. Test Plan: Rely on CI jobs Reviewers: Subscribers: Tasks: Tags: * Fix pull.yml Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent 29428ef commit fff956c

File tree

18 files changed

+244
-1547
lines changed

18 files changed

+244
-1547
lines changed

.github/workflows/pull.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ jobs:
942942
path: |
943943
./et-build
944944
./torchchat/utils/scripts
945-
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
945+
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh', '**/build_native.sh') }}
946946
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
947947
continue-on-error: true
948948
run: |
@@ -1053,7 +1053,7 @@ jobs:
10531053
10541054
# Pull submodules (re2, abseil) for Tiktoken
10551055
git submodule sync
1056-
git submodule update --init
1056+
git submodule update --init --recursive
10571057
./runner/build_android.sh
10581058
echo "Tests complete."
10591059

.gitmodules

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
[submodule "tokenizer/third-party/abseil-cpp"]
2-
path = tokenizer/third-party/abseil-cpp
3-
url = https://github.com/abseil/abseil-cpp.git
4-
[submodule "tokenizer/third-party/re2"]
5-
path = tokenizer/third-party/re2
6-
url = https://github.com/google/re2.git
7-
[submodule "tokenizer/third-party/sentencepiece"]
8-
path = tokenizer/third-party/sentencepiece
9-
url = https://github.com/google/sentencepiece.git
1+
[submodule "runner/third-party/tokenizers"]
2+
path = runner/third-party/tokenizers
3+
url = https://github.com/pytorch-labs/tokenizers

CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,21 @@ ELSE()
77
ENDIF()
88

99
project(Torchchat)
10+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
1011

1112
# include tokenizer
12-
add_subdirectory(tokenizer)
13+
add_subdirectory(runner/third-party/tokenizers)
1314

1415
# include et_run executable
1516
include(runner/et.cmake)
1617
if(TARGET et_run)
17-
target_link_libraries(et_run PUBLIC tokenizer microkernels-prod)
18+
target_link_libraries(et_run PUBLIC tokenizers microkernels-prod)
19+
target_include_directories(et_run PUBLIC runner/third-party/tokenizers/include)
1820
endif()
1921

2022
# include aoti_run executable
2123
include(runner/aoti.cmake)
2224
if(TARGET aoti_run)
23-
target_link_libraries(aoti_run tokenizer)
25+
target_link_libraries(aoti_run tokenizers)
26+
target_include_directories(aoti_run PUBLIC runner/third-party/tokenizers/include)
2427
endif()

0 commit comments

Comments
 (0)