Skip to content

Commit 9ac7b67

Browse files
authored
refactor: introduce shared tokenizer abstraction and split implementations (#1423)
1 parent ee5bf95 commit 9ac7b67

26 files changed

Lines changed: 1332 additions & 1439 deletions

CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,10 @@ file(GLOB SD_LIB_SOURCES
156156
"src/*.h"
157157
"src/*.cpp"
158158
"src/*.hpp"
159-
"src/vocab/*.h"
160-
"src/vocab/*.cpp"
159+
"src/tokenizers/*.h"
160+
"src/tokenizers/*.cpp"
161+
"src/tokenizers/vocab/*.h"
162+
"src/tokenizers/vocab/*.cpp"
161163
)
162164

163165
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
@@ -250,7 +252,7 @@ endif()
250252
add_subdirectory(thirdparty)
251253

252254
target_link_libraries(${SD_LIB} PUBLIC ggml zip)
253-
target_include_directories(${SD_LIB} PUBLIC . include)
255+
target_include_directories(${SD_LIB} PUBLIC . src include)
254256
target_include_directories(${SD_LIB} PUBLIC . thirdparty)
255257
target_compile_features(${SD_LIB} PUBLIC c_std_11 cxx_std_17)
256258

format-code.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
for f in src/*.cpp src/*.h src/*.hpp src/vocab/*.h src/vocab/*.cpp \
1+
for f in src/*.cpp src/*.h src/*.hpp src/tokenizers/*.h src/tokenizers/*.cpp src/tokenizers/vocab/*.h src/tokenizers/vocab/*.cpp \
22
examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \
33
examples/common/*.hpp examples/common/*.h examples/common/*.cpp; do
44
[[ "$f" == vocab* ]] && continue

0 commit comments

Comments
 (0)