From 7a176837e965bd1dfd353c751cb4892bb0d15052 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Wed, 1 Apr 2026 23:17:13 +0200 Subject: [PATCH 1/7] Add Qwen3.5 4B --- Package.swift | 3 +- Sources/SHLLM/LLM.swift | 63 +-- Tests/SHLLMTests/Models/Qwen3_5-4BTests.swift | 429 ++++++++++++++++++ bin/download.sh | 1 + 4 files changed, 467 insertions(+), 29 deletions(-) create mode 100644 Tests/SHLLMTests/Models/Qwen3_5-4BTests.swift diff --git a/Package.swift b/Package.swift index 13b5646..ff10866 100644 --- a/Package.swift +++ b/Package.swift @@ -44,7 +44,7 @@ let package = Package( ), ], // resources: [ -// .copy("Resources/DeepSeek-R1-Distill-Qwen-7B-4bit"), + // .copy("Resources/DeepSeek-R1-Distill-Qwen-7B-4bit"), // .copy("Resources/gemma-2-2b-it-4bit"), // .copy("Resources/gemma-3-12b-it-qat-3bit"), // .copy("Resources/gemma-3-12b-it-qat-4bit"), @@ -80,6 +80,7 @@ let package = Package( // .copy("Resources/Qwen3-VL-4B-Instruct-4bit"), // .copy("Resources/Qwen3-VL-4B-Thinking-4bit"), // .copy("Resources/Qwen3.5-2B-6bit"), +// .copy("Resources/Qwen3.5-4B-MLX-4bit"), // .copy("Resources/Qwen3.5-9B-4bit"), // .copy("Resources/Qwen3.5-27B-4bit"), // .copy("Resources/Qwen3.5-35B-A3B-4bit"), diff --git a/Sources/SHLLM/LLM.swift b/Sources/SHLLM/LLM.swift index 2b14128..65b81fb 100644 --- a/Sources/SHLLM/LLM.swift +++ b/Sources/SHLLM/LLM.swift @@ -1186,34 +1186,6 @@ extension LLM where Model == Qwen3MoEModel { } } -// MARK: - Smol - -extension LLM where Model == LlamaModel { - public static func smolLM( - directory: URL, - input: UserInput, - tools: [any ToolProtocol] = [], - maxInputTokenCount: Int? = nil, - maxOutputTokenCount: Int? = nil - ) throws -> LLM { - try SHLLM.assertSupportedDevice - return .init( - directory: directory, - input: input, - tools: tools, - maxInputTokenCount: maxInputTokenCount, - maxOutputTokenCount: maxOutputTokenCount - ) - } - - static var smolLM: URL { - get throws { - let dir = "SmolLM-135M-Instruct-4bit" - return try Bundle.shllm.directory(named: dir) - } - } -} - // MARK: - Qwen3 VL extension LLM where Model == Qwen3VL { @@ -1346,6 +1318,13 @@ extension LLM where Model == Qwen35 { } } + static var qwen3_5__4B: URL { + get throws { + let dir = "Qwen3.5-4B-MLX-4bit" + return try Bundle.shllm.directory(named: dir) + } + } + static var qwen3_5__9B: URL { get throws { let dir = "Qwen3.5-9B-4bit" @@ -1409,3 +1388,31 @@ extension LLM where Model == Qwen35MoE { } } } + +// MARK: - Smol + +extension LLM where Model == LlamaModel { + public static func smolLM( + directory: URL, + input: UserInput, + tools: [any ToolProtocol] = [], + maxInputTokenCount: Int? = nil, + maxOutputTokenCount: Int? = nil + ) throws -> LLM { + try SHLLM.assertSupportedDevice + return .init( + directory: directory, + input: input, + tools: tools, + maxInputTokenCount: maxInputTokenCount, + maxOutputTokenCount: maxOutputTokenCount + ) + } + + static var smolLM: URL { + get throws { + let dir = "SmolLM-135M-Instruct-4bit" + return try Bundle.shllm.directory(named: dir) + } + } +} diff --git a/Tests/SHLLMTests/Models/Qwen3_5-4BTests.swift b/Tests/SHLLMTests/Models/Qwen3_5-4BTests.swift new file mode 100644 index 0000000..fed0a8e --- /dev/null +++ b/Tests/SHLLMTests/Models/Qwen3_5-4BTests.swift @@ -0,0 +1,429 @@ +import Foundation +import MLXLMCommon +import MLXVLM +@testable import SHLLM +import Testing + +@Suite(.serialized) +struct Qwen3_5_4BTests { + @Test + func canStreamResult() async throws { + let input: UserInput = .init(messages: [ + ["role": "system", "content": "You are a helpful assistant."], + ["role": "user", "content": "What is the meaning of life?"], + ]) + + guard let llm = try qwen3_5__4B(input) else { return } + + var reasoning = "" + var result = "" + for try await reply in llm { + switch reply { + case let .reasoning(text): + reasoning.append(text) + case let .text(text): + result.append(text) + case .toolCall: + Issue.record() + } + } + + Swift.print("\n\(reasoning)\n") + #expect(!reasoning.isEmpty) + + Swift.print(result) + #expect(!result.isEmpty) + } + + @Test + func canStreamTextResult() async throws { + let input: UserInput = .init(messages: [ + ["role": "system", "content": "You are a helpful assistant."], + ["role": "user", "content": "What is the meaning of life?"], + ]) + + guard let llm = try qwen3_5__4B(input) else { return } + + var result = "" + for try await reply in llm.text { + result.append(reply) + } + + Swift.print(result) + #expect(!result.isEmpty) + } + + @Test + func canAwaitResult() async throws { + let input: UserInput = .init(messages: [ + ["role": "system", "content": "You are a helpful assistant."], + ["role": "user", "content": "What is the meaning of life?"], + ]) + + guard let llm = try qwen3_5__4B(input) else { return } + + let (_reasoning, _text, toolCalls) = try await llm.result + + let reasoning = try #require(_reasoning) + Swift.print("\n\(reasoning)\n") + #expect(!reasoning.isEmpty) + + let text = try #require(_text) + Swift.print(text) + #expect(!text.isEmpty) + + #expect(toolCalls == nil) + } + + @Test + func canAwaitTextResult() async throws { + let input: UserInput = .init(messages: [ + ["role": "system", "content": "You are a helpful assistant."], + ["role": "user", "content": "What is the meaning of life?"], + ]) + + guard let llm = try qwen3_5__4B(input) else { return } + + let result = try await llm.text.result + Swift.print(result) + #expect(!result.isEmpty) + } + + @Test + func canStreamResultWithoutThinking() async throws { + var input: UserInput = .init(messages: [ + ["role": "system", "content": "You are a helpful assistant."], + ["role": "user", "content": "What is the meaning of life?"], + ]) + input.additionalContext = ["enable_thinking": false] + + guard let llm = try qwen3_5__4B(input) else { return } + + let (reasoning, _text, _) = try await llm.result + #expect(reasoning == nil) + + let text = try #require(_text) + Swift.print(text) + #expect(!text.isEmpty) + } + + @Test + func canFetchTheWeather() async throws { + let input = UserInput(chat: [ + .system( + "You are a weather assistant who must use the get_current_weather tool to fetch weather data for any location the user asks about." + ), + .user("What is the weather in Paris, France?"), + ]) + + guard let llm = try qwen3_5__4B( + input, + tools: [weatherTool] + ) else { return } + + var reasoning = "" + var reply = "" + var toolCallCount = 0 + var weatherLocationFound = false + + for try await response in llm { + switch response { + case let .reasoning(text): + reasoning.append(text) + case let .text(text): + reply.append(text) + case let .toolCall(toolCall): + toolCallCount += 1 + #expect(toolCall.function.name == "get_current_weather") + + if case let .string(location) = toolCall.function.arguments["location"] { + weatherLocationFound = location.lowercased().contains("paris") + } + } + } + + #expect(!reasoning.isEmpty) + #expect(reply.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + #expect(toolCallCount >= 1) + #expect(weatherLocationFound) + } + + @Test + func canChooseBetweenDifferentTools() async throws { + let input = UserInput(chat: [ + .system( + "You are a helpful assistant that can provide weather, stock prices, and news." + ), + .user("Get the latest news about Apple, sorted by popularity."), + ]) + + guard let llm = try qwen3_5__4B( + input, + tools: [weatherTool, stockTool, newsTool] + ) else { return } + + var reasoning = "" + var reply = "" + var toolCallCount = 0 + var newsQueryFound = false + var newsSortByFound = false + + for try await response in llm { + switch response { + case let .reasoning(text): + reasoning.append(text) + case let .text(text): + reply.append(text) + case let .toolCall(toolCall): + toolCallCount += 1 + #expect(toolCall.function.name == "get_latest_news") + + if case let .string(query) = toolCall.function.arguments["query"] { + newsQueryFound = query.lowercased().contains("apple") + } + if case let .string(sortBy) = toolCall.function.arguments["sortBy"] { + newsSortByFound = sortBy.lowercased().contains("popularity") + } + } + } + + #expect(!reasoning.isEmpty) + #expect(reply.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + #expect(toolCallCount >= 1) + #expect(newsQueryFound) + #expect(newsSortByFound) + } + + @Test + func canUseStockToolAndRespond() async throws { + let chat: [Chat.Message] = [ + .system( + "You are a helpful assistant that can provide stock prices. When asked for a stock price, you must use the get_stock_price tool." + ), + .user("What is the price of AAPL?"), + ] + + var input = UserInput(chat: chat) + + guard let llm1 = try qwen3_5__4B( + input, + tools: [stockTool] + ) else { return } + + let (reasoning1, text1, toolCallsOpt1) = try await llm1.result + #expect(reasoning1 != nil) + #expect(text1 == nil) + let toolCall1 = try #require(toolCallsOpt1?.first) + + #expect(toolCall1.function.name == "get_stock_price") + #expect(toolCall1.function.arguments["symbol"] == .string("AAPL")) + + input.appendAssistantToolCall(toolCall1) + input.appendToolResult(["price": 123.45]) + guard let llm2 = try qwen3_5__4B( + input, + tools: [stockTool] + ) else { return } + + let (reasoning2, text2, toolCallsOpt2) = try await llm2.result + Swift.print(text2 ?? "") + #expect(reasoning2 != nil) + #expect(text2?.isEmpty == false) + #expect(text2?.contains(oneOf: ["aapl"]) == true) + #expect(text2?.contains("123.45") == true) + #expect(toolCallsOpt2 == nil) + } + + @Test + func canCompleteMultiToolWorkflowAndEmail() async throws { + let chat: [Chat.Message] = [ + .system(""" + You are a helpful assistant that must complete tasks by calling tools \ + in sequence. When asked to find information on the web and email it, \ + you must: + + 1) use web_search to find a relevant page + 2) use fetch_web_page to retrieve the page content + 3) use find_email_in_contacts to get the recipient's email + 4) use send_email to send the email with the requested information. + """ + ), + .user( + "Find the keynote date from the ACME Conference website and email it to Alex Example." + ), + ] + + var input = UserInput(chat: chat) + guard let llm = try qwen3_5__4B(input, tools: [ + webSearchTool, fetchPageTool, findEmailTool, sendEmailTool, + ]) else { return } + + let (_, _, toolCallsOutput1) = try await llm.result + let toolCall1 = try #require(toolCallsOutput1?.first) + #expect(toolCall1.function.name == "web_search") + + input.appendAssistantToolCall(toolCall1) + input.appendToolResult([ + "results": [[ + "title": "ACME Conference 2025 Keynote", + "url": "https://acme.test/conf", + ]], + ]) + + guard let llm2 = try qwen3_5__4B(input, tools: [ + webSearchTool, fetchPageTool, findEmailTool, sendEmailTool, + ]) else { return } + let (_, _, toolCallsOutput2) = try await llm2.result + let toolCall2 = try #require(toolCallsOutput2?.first) + #expect(toolCall2.function.name == "fetch_web_page") + + input.appendAssistantToolCall(toolCall2) + input.appendToolResult([ + "content": "Welcome to ACME Conf! Keynote date: November 5, 2025.", + ]) + + guard let llm3 = try qwen3_5__4B(input, tools: [ + webSearchTool, fetchPageTool, findEmailTool, sendEmailTool, + ]) else { return } + let (_, _, toolCallsOutput3) = try await llm3.result + let toolCall3 = try #require(toolCallsOutput3?.first) + #expect(toolCall3.function.name == "find_email_in_contacts") + + input.appendAssistantToolCall(toolCall3) + input.appendToolResult([ + "email": "alex@example.com", + ]) + + guard let llm4 = try qwen3_5__4B(input, tools: [ + webSearchTool, fetchPageTool, findEmailTool, sendEmailTool, + ]) else { return } + let (reasoning, text, toolCalls4) = try await llm4.result + + guard let toolCall4 = toolCalls4?.first else { + Issue.record(""" + Did not call send_email: reasoning=\(String(describing: reasoning)), \ + text=\(String(describing: text)) + """) + return + } + + #expect(toolCall4.function.name == "send_email") + let toArg = try #require(toolCall4.function.arguments["to"]) + let subjectArg = try #require(toolCall4.function.arguments["subject"]) + let bodyArg = try #require(toolCall4.function.arguments["body"]) + #expect((toArg.anyValue as? String) == "alex@example.com") + #expect((subjectArg.anyValue as? String)?.isEmpty == false) + #expect((bodyArg.anyValue as? String)?.isEmpty == false) + + input.appendAssistantToolCall(toolCall4) + input.appendToolResult(["status": "sent"]) + + guard let llm5 = try qwen3_5__4B(input, tools: [ + webSearchTool, fetchPageTool, findEmailTool, sendEmailTool, + ]) else { return } + + let response = try await llm5.text.result + Swift.print(response) + #expect(!response.isEmpty) + #expect(response.contains(oneOf: ["sent", "emailed"])) + #expect(response.lowercased().contains("alex")) + } + + @Test + @MainActor + func canExtractTextFromImageData() async throws { + let data = try authenticationFactors + guard let llm = try qwen3_5__4B(image: data) else { return } + + var response = "" + for try await token in llm.text { + response += token + } + + Swift.print(response) + let strings = [ + "authentication", + "Something you forgot", + "Something you left in the taxi", + "Something that can be chopped off", + ] + #expect(response.contains(oneOf: strings)) + } + + @Test + @MainActor + func canExtractTextFromImageURL() async throws { + let url = try authenticationFactorsURL + guard let llm = try qwen3_5__4B(image: url) else { return } + + var response = "" + for try await token in llm.text { + response += token + } + + Swift.print(response) + let expected = [ + "authentication", + "Something you forgot", + "Something you left in the taxi", + "Something that can be chopped off", + ] + #expect(response.contains(oneOf: expected)) + } +} + +private func qwen3_5__4B( + _ input: UserInput, + tools: [any ToolProtocol] = [] +) throws -> LLM? { + try loadModel( + directory: LLM.qwen3_5__4B, + input: input, + tools: tools, + responseParser: LLM.qwen3_5Parser(for: input) + ) +} + +private func qwen3_5__4B( + image: Data +) throws -> LLM? { + let input = imageInput(image) + return try loadModel( + directory: LLM.qwen3_5__4B, + input: input, + responseParser: LLM.qwen3_5Parser(for: input) + ) +} + +private func qwen3_5__4B( + image: URL +) throws -> LLM? { + let input = imageInput(image) + return try loadModel( + directory: LLM.qwen3_5__4B, + input: input, + responseParser: LLM.qwen3_5Parser(for: input) + ) +} + +private var authenticationFactorsURL: URL { + get throws { + guard let url = Bundle.module.url( + forResource: "3-authentication-factors", + withExtension: "png" + ) else { + throw NSError( + domain: NSURLErrorDomain, + code: NSURLErrorFileDoesNotExist, + userInfo: nil + ) + } + return url + } +} + +private var authenticationFactors: Data { + get throws { + try Data(contentsOf: authenticationFactorsURL) + } +} diff --git a/bin/download.sh b/bin/download.sh index 9a40076..076251c 100755 --- a/bin/download.sh +++ b/bin/download.sh @@ -45,6 +45,7 @@ ids=( "Qwen3-VL-4B-Instruct-4bit" "Qwen3-VL-4B-Thinking-4bit" "Qwen3.5-2B-6bit" + "Qwen3.5-4B-MLX-4bit" "Qwen3.5-27B-4bit" "Qwen3.5-35B-A3B-4bit" "Qwen3.5-9B-4bit" From df4d56d90c25d0f3521278be4e084e2c2d052218 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Sat, 4 Apr 2026 13:08:51 +0200 Subject: [PATCH 2/7] Update mlx-swift-lm and use DePasqualeOrg/swift-tokenizers --- Package.resolved | 219 ++---------------- Package.swift | 16 +- Sources/SHLLM/LLM.swift | 1 - Sources/SHLLM/ModelCache.swift | 4 +- Sources/SHLLM/SHLLM.swift | 2 +- Sources/SHLLM/TokenizersLoader.swift | 56 +++++ .../SHLLM/TruncatingUserInputProcessor.swift | 3 +- .../TruncatingUserInputProcessorTests.swift | 96 +------- 8 files changed, 91 insertions(+), 306 deletions(-) create mode 100644 Sources/SHLLM/TokenizersLoader.swift diff --git a/Package.resolved b/Package.resolved index a533262..5cfba05 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,31 +1,13 @@ { - "originHash" : "872b6229e6bd91a68cee931a98a03f360346ba4753b0d988ba6ffaf7edad82bd", + "originHash" : "a396165c1f87035d1edacbeb5ab75ce114b510c28ecdb4f59826b74f189bf9e0", "pins" : [ - { - "identity" : "async-http-client", - "kind" : "remoteSourceControl", - "location" : "https://github.com/swift-server/async-http-client", - "state" : { - "revision" : "2fc4652fb4689eb24af10e55cabaa61d8ba774fd", - "version" : "1.32.0" - } - }, - { - "identity" : "eventsource", - "kind" : "remoteSourceControl", - "location" : "https://github.com/mattt/EventSource.git", - "state" : { - "revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e", - "version" : "1.4.1" - } - }, { "identity" : "mlx-swift", "kind" : "remoteSourceControl", "location" : "https://github.com/ml-explore/mlx-swift", "state" : { - "revision" : "6ba4827fb82c97d012eec9ab4b2de21f85c3b33d", - "version" : "0.30.6" + "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896", + "version" : "0.31.3" } }, { @@ -33,26 +15,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/shareup/mlx-swift-lm", "state" : { - "revision" : "056832eff7ce48efde44236404703df462839884", - "version" : "0.0.11" - } - }, - { - "identity" : "swift-algorithms", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-algorithms.git", - "state" : { - "revision" : "87e50f483c54e6efd60e885f7f5aa946cee68023", - "version" : "1.2.1" - } - }, - { - "identity" : "swift-asn1", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-asn1.git", - "state" : { - "revision" : "810496cf121e525d660cd0ea89a758740476b85f", - "version" : "1.5.1" + "revision" : "d1b14783c93902b74c1211f480ece8f776f4c29c", + "version" : "0.0.12" } }, { @@ -64,24 +28,6 @@ "version" : "1.1.3" } }, - { - "identity" : "swift-atomics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-atomics.git", - "state" : { - "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", - "version" : "1.3.0" - } - }, - { - "identity" : "swift-certificates", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-certificates.git", - "state" : { - "revision" : "24ccdeeeed4dfaae7955fcac9dbf5489ed4f1a25", - "version" : "1.18.0" - } - }, { "identity" : "swift-collections", "kind" : "remoteSourceControl", @@ -91,121 +37,13 @@ "version" : "1.4.0" } }, - { - "identity" : "swift-configuration", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-configuration.git", - "state" : { - "revision" : "be76c4ad929eb6c4bcaf3351799f2adf9e6848a9", - "version" : "1.2.0" - } - }, - { - "identity" : "swift-crypto", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-crypto.git", - "state" : { - "revision" : "6f70fa9eab24c1fd982af18c281c4525d05e3095", - "version" : "4.2.0" - } - }, - { - "identity" : "swift-distributed-tracing", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-distributed-tracing.git", - "state" : { - "revision" : "dc4030184203ffafbb2ec614352487235d747fe0", - "version" : "1.4.1" - } - }, - { - "identity" : "swift-http-structured-headers", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-http-structured-headers.git", - "state" : { - "revision" : "76d7627bd88b47bf5a0f8497dd244885960dde0b", - "version" : "1.6.0" - } - }, - { - "identity" : "swift-http-types", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-http-types.git", - "state" : { - "revision" : "45eb0224913ea070ec4fba17291b9e7ecf4749ca", - "version" : "1.5.1" - } - }, - { - "identity" : "swift-huggingface", - "kind" : "remoteSourceControl", - "location" : "https://github.com/huggingface/swift-huggingface.git", - "state" : { - "revision" : "de01c0ab8fd537bbd8216cea7f774275178501a2", - "version" : "0.8.1" - } - }, { "identity" : "swift-jinja", "kind" : "remoteSourceControl", "location" : "https://github.com/huggingface/swift-jinja.git", "state" : { - "revision" : "f731f03bf746481d4fda07f817c3774390c4d5b9", - "version" : "2.3.2" - } - }, - { - "identity" : "swift-log", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-log.git", - "state" : { - "revision" : "bbd81b6725ae874c69e9b8c8804d462356b55523", - "version" : "1.10.1" - } - }, - { - "identity" : "swift-nio", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-nio.git", - "state" : { - "revision" : "b31565862a8f39866af50bc6676160d8dda7de35", - "version" : "2.96.0" - } - }, - { - "identity" : "swift-nio-extras", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-nio-extras.git", - "state" : { - "revision" : "3df009d563dc9f21a5c85b33d8c2e34d2e4f8c3b", - "version" : "1.32.1" - } - }, - { - "identity" : "swift-nio-http2", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-nio-http2.git", - "state" : { - "revision" : "b6571f3db40799df5a7fc0e92c399aa71c883edd", - "version" : "1.40.0" - } - }, - { - "identity" : "swift-nio-ssl", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-nio-ssl.git", - "state" : { - "revision" : "173cc69a058623525a58ae6710e2f5727c663793", - "version" : "2.36.0" - } - }, - { - "identity" : "swift-nio-transport-services", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-nio-transport-services.git", - "state" : { - "revision" : "60c3e187154421171721c1a38e800b390680fb5d", - "version" : "1.26.0" + "revision" : "0aeefadec459ce8e11a333769950fb86183aca43", + "version" : "2.3.5" } }, { @@ -218,48 +56,21 @@ } }, { - "identity" : "swift-service-context", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-service-context.git", - "state" : { - "revision" : "d0997351b0c7779017f88e7a93bc30a1878d7f29", - "version" : "1.3.0" - } - }, - { - "identity" : "swift-service-lifecycle", - "kind" : "remoteSourceControl", - "location" : "https://github.com/swift-server/swift-service-lifecycle", - "state" : { - "revision" : "89888196dd79c61c50bca9a103d8114f32e1e598", - "version" : "2.10.1" - } - }, - { - "identity" : "swift-system", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-system", - "state" : { - "revision" : "7c6ad0fc39d0763e0b699210e4124afd5041c5df", - "version" : "1.6.4" - } - }, - { - "identity" : "swift-transformers", + "identity" : "swift-syntax", "kind" : "remoteSourceControl", - "location" : "https://github.com/shareup/swift-transformers", + "location" : "https://github.com/swiftlang/swift-syntax.git", "state" : { - "revision" : "531b8b0f94fcd69e0963e7d4f73788cd29471c2d", - "version" : "0.0.1" + "revision" : "0687f71944021d616d34d922343dcef086855920", + "version" : "600.0.1" } }, { - "identity" : "swift-xet", + "identity" : "swift-tokenizers", "kind" : "remoteSourceControl", - "location" : "https://github.com/mattt/swift-xet.git", + "location" : "https://github.com/DePasqualeOrg/swift-tokenizers.git", "state" : { - "revision" : "341bfd4172f6a57119bfd49bafa11cf5d21fab75", - "version" : "0.2.3" + "revision" : "d500ac13c5a9a337b8318d914debfa4ab7fea8cf", + "version" : "0.2.1" } }, { diff --git a/Package.swift b/Package.swift index ff10866..b4f25b3 100644 --- a/Package.swift +++ b/Package.swift @@ -17,11 +17,11 @@ let package = Package( ), .package( url: "https://github.com/shareup/mlx-swift-lm", - from: "0.0.11" + from: "0.0.12" ), .package( - url: "https://github.com/shareup/swift-transformers", - from: "0.0.1" + url: "https://github.com/DePasqualeOrg/swift-tokenizers", + from: "0.2.1" ), .package( url: "https://github.com/shareup/synchronized.git", @@ -36,15 +36,11 @@ let package = Package( .product(name: "MLXLLM", package: "mlx-swift-lm"), .product(name: "MLXLMCommon", package: "mlx-swift-lm"), .product(name: "MLXVLM", package: "mlx-swift-lm"), + .product(name: "Tokenizers", package: "swift-tokenizers"), .product(name: "Synchronized", package: "synchronized"), - .product( - name: "Transformers", - package: "swift-transformers", - moduleAliases: ["Models": "TransformersModels"] - ), ], // resources: [ - // .copy("Resources/DeepSeek-R1-Distill-Qwen-7B-4bit"), +// .copy("Resources/DeepSeek-R1-Distill-Qwen-7B-4bit"), // .copy("Resources/gemma-2-2b-it-4bit"), // .copy("Resources/gemma-3-12b-it-qat-3bit"), // .copy("Resources/gemma-3-12b-it-qat-4bit"), @@ -83,7 +79,7 @@ let package = Package( // .copy("Resources/Qwen3.5-4B-MLX-4bit"), // .copy("Resources/Qwen3.5-9B-4bit"), // .copy("Resources/Qwen3.5-27B-4bit"), -// .copy("Resources/Qwen3.5-35B-A3B-4bit"), +// .copy("Resources/Qwen3.5-35B-A3B-4bit"), // ], linkerSettings: [ .linkedFramework("CoreGraphics", .when(platforms: [.macOS])), diff --git a/Sources/SHLLM/LLM.swift b/Sources/SHLLM/LLM.swift index 65b81fb..d6c1fcb 100644 --- a/Sources/SHLLM/LLM.swift +++ b/Sources/SHLLM/LLM.swift @@ -8,7 +8,6 @@ import MLXLMCommon import MLXNN import MLXVLM import os.log -import Tokenizers public enum Response { case reasoning(String) diff --git a/Sources/SHLLM/ModelCache.swift b/Sources/SHLLM/ModelCache.swift index 2dabaa6..549dfea 100644 --- a/Sources/SHLLM/ModelCache.swift +++ b/Sources/SHLLM/ModelCache.swift @@ -46,7 +46,9 @@ func loadModelContext( let start = Date() try SHLLM.assertSupportedDevice - let baseContext = try await loadModel(directory: directory) + let baseContext = try await loadModel( + from: directory, using: TokenizerLoader() + ) let config = customConfiguration?(baseContext.configuration) ?? baseContext.configuration diff --git a/Sources/SHLLM/SHLLM.swift b/Sources/SHLLM/SHLLM.swift index cf0104f..85dd827 100644 --- a/Sources/SHLLM/SHLLM.swift +++ b/Sources/SHLLM/SHLLM.swift @@ -84,7 +84,7 @@ public enum SHLLM { @_exported import struct MLXLMCommon.ToolParameter @_exported import enum MLXLMCommon.ToolParameterType @_exported import protocol MLXLMCommon.ToolProtocol -@_exported import typealias Tokenizers.ToolSpec +@_exported import typealias MLXLMCommon.ToolSpec extension Chat.Message: @retroactive @unchecked Sendable {} diff --git a/Sources/SHLLM/TokenizersLoader.swift b/Sources/SHLLM/TokenizersLoader.swift new file mode 100644 index 0000000..ce4ebe8 --- /dev/null +++ b/Sources/SHLLM/TokenizersLoader.swift @@ -0,0 +1,56 @@ +import Foundation +import MLXLMCommon +import Tokenizers + +struct TokenizerLoader: MLXLMCommon.TokenizerLoader { + func load(from directory: URL) async throws -> any MLXLMCommon.Tokenizer { + let upstream = try await AutoTokenizer.from(directory: directory) + return _Tokenizer(upstream) + } +} + +private struct _Tokenizer: MLXLMCommon.Tokenizer { + private let upstream: any Tokenizers.Tokenizer + + init(_ upstream: any Tokenizers.Tokenizer) { + self.upstream = upstream + } + + func encode(text: String, addSpecialTokens: Bool) -> [Int] { + upstream.encode(text: text, addSpecialTokens: addSpecialTokens) + } + + func decode(tokenIds: [Int], skipSpecialTokens: Bool) -> String { + upstream.decode(tokenIds: tokenIds, skipSpecialTokens: skipSpecialTokens) + } + + func convertTokenToId(_ token: String) -> Int? { + upstream.convertTokenToId(token) + } + + func convertIdToToken(_ id: Int) -> String? { + upstream.convertIdToToken(id) + } + + var bosToken: String? { upstream.bosToken } + var eosToken: String? { upstream.eosToken } + var unknownToken: String? { upstream.unknownToken } + + func applyChatTemplate( + messages: [[String: any Sendable]], + tools: [[String: any Sendable]]?, + additionalContext: [String: any Sendable]? + ) throws -> [Int] { + do { + return try upstream.applyChatTemplate( + messages: messages, + tools: tools, + additionalContext: additionalContext + ) + } catch let error as Tokenizers.TokenizerError + where error == .missingChatTemplate + { + throw MLXLMCommon.TokenizerError.missingChatTemplate + } + } +} diff --git a/Sources/SHLLM/TruncatingUserInputProcessor.swift b/Sources/SHLLM/TruncatingUserInputProcessor.swift index dab90fa..d4633cc 100644 --- a/Sources/SHLLM/TruncatingUserInputProcessor.swift +++ b/Sources/SHLLM/TruncatingUserInputProcessor.swift @@ -1,7 +1,6 @@ import Foundation import MLX import MLXLMCommon -import Tokenizers struct TruncatingUserInputProcessor: UserInputProcessor { private let baseProcessor: UserInputProcessor @@ -109,7 +108,7 @@ struct TruncatingUserInputProcessor: UserInputProcessor { let secondHalf = tokens.suffix(Int(half.rounded(.up))) assert(firstHalf.count + secondHalf.count <= maxTokenCount) let truncatedTokens = Array(firstHalf + secondHalf) - return tokenizer.decode(tokens: truncatedTokens) + return tokenizer.decode(tokenIds: truncatedTokens) } private func recentMessages( diff --git a/Tests/SHLLMTests/TruncatingUserInputProcessorTests.swift b/Tests/SHLLMTests/TruncatingUserInputProcessorTests.swift index 1c96b8b..00cfde6 100644 --- a/Tests/SHLLMTests/TruncatingUserInputProcessorTests.swift +++ b/Tests/SHLLMTests/TruncatingUserInputProcessorTests.swift @@ -3,7 +3,6 @@ import MLX import MLXLMCommon @testable import SHLLM import Testing -import Tokenizers @Suite(.serialized) struct TruncatingUserInputProcessorTests { @@ -487,13 +486,8 @@ struct TruncatingUserInputProcessorTests { } } -private final class NaiveTokenizer: Tokenizer { - func tokenize(text: String) -> [String] { - text.components(separatedBy: .whitespacesAndNewlines) - .filter { !$0.isEmpty } - } - - func encode(text: String) -> [Int] { +private final class NaiveTokenizer: MLXLMCommon.Tokenizer { + func encode(text: String, addSpecialTokens _: Bool) -> [Int] { text.components(separatedBy: .whitespacesAndNewlines) .filter { !$0.isEmpty } .enumerated() @@ -505,16 +499,8 @@ private final class NaiveTokenizer: Tokenizer { } } - func encode(text: String, addSpecialTokens _: Bool) -> [Int] { - encode(text: text) - } - - func decode(tokens: [Int]) -> String { - decode(tokens: tokens, skipSpecialTokens: false) - } - - func decode(tokens: [Int], skipSpecialTokens _: Bool) -> String { - tokens + func decode(tokenIds: [Int], skipSpecialTokens _: Bool) -> String { + tokenIds .map(String.init) .joined(separator: " ") } @@ -527,84 +513,20 @@ private final class NaiveTokenizer: Tokenizer { String(id) } - func callAsFunction(_ text: String, addSpecialTokens: Bool) -> [Int] { - encode(text: text, addSpecialTokens: addSpecialTokens) - } - - func convertTokensToIds(_ tokens: [String]) -> [Int?] { - tokens.map { convertTokenToId($0) } - } - - func convertIdsToTokens(_ ids: [Int]) -> [String?] { - ids.map { convertIdToToken($0) } - } - var bosToken: String? { nil } - var bosTokenId: Int? { nil } var eosToken: String? { nil } - var eosTokenId: Int? { nil } var unknownToken: String? { nil } - var unknownTokenId: Int? { nil } - var hasChatTemplate: Bool { true } - func applyChatTemplate(messages: [Tokenizers.Message]) throws -> [Int] { + func applyChatTemplate( + messages: [[String: any Sendable]], + tools _: [[String: any Sendable]]?, + additionalContext _: [String: any Sendable]? + ) throws -> [Int] { let combined = messages .compactMap { $0["content"] as? String } .joined(separator: " ") return encode(text: combined) } - - func applyChatTemplate( - messages: [Tokenizers.Message], - tools _: [Tokenizers.ToolSpec]? - ) throws -> [Int] { - try applyChatTemplate(messages: messages) - } - - func applyChatTemplate( - messages: [Tokenizers.Message], - tools _: [Tokenizers.ToolSpec]?, - additionalContext _: [String: any Sendable]? - ) throws -> [Int] { - try applyChatTemplate(messages: messages) - } - - func applyChatTemplate( - messages: [Tokenizers.Message], - chatTemplate _: Tokenizers.ChatTemplateArgument - ) throws -> [Int] { - try applyChatTemplate(messages: messages) - } - - func applyChatTemplate( - messages: [Tokenizers.Message], - chatTemplate _: String - ) throws -> [Int] { - try applyChatTemplate(messages: messages) - } - - func applyChatTemplate( - messages: [Tokenizers.Message], - chatTemplate _: Tokenizers.ChatTemplateArgument?, - addGenerationPrompt _: Bool, - truncation _: Bool, - maxLength _: Int?, - tools _: [Tokenizers.ToolSpec]? - ) throws -> [Int] { - try applyChatTemplate(messages: messages) - } - - func applyChatTemplate( - messages: [Tokenizers.Message], - chatTemplate _: Tokenizers.ChatTemplateArgument?, - addGenerationPrompt _: Bool, - truncation _: Bool, - maxLength _: Int?, - tools _: [Tokenizers.ToolSpec]?, - additionalContext _: [String: any Sendable]? - ) throws -> [Int] { - try applyChatTemplate(messages: messages) - } } private struct NaiveInputProcessor: UserInputProcessor { From 774943b19a393b03fd6a24b11c51f6404b8dedd3 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Sun, 5 Apr 2026 22:46:27 +0200 Subject: [PATCH 3/7] Upgrade mlx-swift-lm --- Package.resolved | 6 +++--- Package.swift | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Package.resolved b/Package.resolved index 5cfba05..2bc728d 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "a396165c1f87035d1edacbeb5ab75ce114b510c28ecdb4f59826b74f189bf9e0", + "originHash" : "52f195d0f4b33995c883da76c3c392ba8efa2d2b0dc07d1ef3b281443317130b", "pins" : [ { "identity" : "mlx-swift", @@ -15,8 +15,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/shareup/mlx-swift-lm", "state" : { - "revision" : "d1b14783c93902b74c1211f480ece8f776f4c29c", - "version" : "0.0.12" + "revision" : "22deba9be9b3e815d339bc22f170b0248f4a2caf", + "version" : "0.0.13" } }, { diff --git a/Package.swift b/Package.swift index b4f25b3..b6bd673 100644 --- a/Package.swift +++ b/Package.swift @@ -17,7 +17,7 @@ let package = Package( ), .package( url: "https://github.com/shareup/mlx-swift-lm", - from: "0.0.12" + from: "0.0.13" ), .package( url: "https://github.com/DePasqualeOrg/swift-tokenizers", From 6b1bc38ef8fbc79cb875bdc0ec0f032acf9e3ff9 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Mon, 6 Apr 2026 14:26:01 +0200 Subject: [PATCH 4/7] Update mlx-swift-lm --- Package.resolved | 6 +++--- Package.swift | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Package.resolved b/Package.resolved index 2bc728d..4b0ade9 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "52f195d0f4b33995c883da76c3c392ba8efa2d2b0dc07d1ef3b281443317130b", + "originHash" : "0e1ab4a07bf85e219a0e02bbc89d073a5568bda7a85e0c2b2b385df9881f40ce", "pins" : [ { "identity" : "mlx-swift", @@ -15,8 +15,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/shareup/mlx-swift-lm", "state" : { - "revision" : "22deba9be9b3e815d339bc22f170b0248f4a2caf", - "version" : "0.0.13" + "revision" : "8825bdd20fbcf982171972a661016817363ca495", + "version" : "0.0.14" } }, { diff --git a/Package.swift b/Package.swift index b6bd673..d9ee721 100644 --- a/Package.swift +++ b/Package.swift @@ -17,7 +17,7 @@ let package = Package( ), .package( url: "https://github.com/shareup/mlx-swift-lm", - from: "0.0.13" + from: "0.0.14" ), .package( url: "https://github.com/DePasqualeOrg/swift-tokenizers", From e2bdc1e6921cb22ac71901ec4f6bd30d217ad0a0 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Mon, 6 Apr 2026 14:35:25 +0200 Subject: [PATCH 5/7] Update CI --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b2fbf7..ed4ccf9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Select Xcode 16 - run: sudo xcode-select -s /Applications/Xcode_16.2.app + - name: Select Xcode 26 + run: sudo xcode-select -s /Applications/Xcode_26.3.app - name: Test run: ./bin/test.sh From 8e63bfbd1a3711bedf918d0e0cabe222fbfe981a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 13:22:11 +0000 Subject: [PATCH 6/7] Rename TokenizersLoader.swift to TokenizerLoader.swift Agent-Logs-Url: https://github.com/shareup/shllm/sessions/4e85f0d2-349b-4dd8-8452-5231b169b0f0 Co-authored-by: atdrendel <202402+atdrendel@users.noreply.github.com> --- Sources/SHLLM/{TokenizersLoader.swift => TokenizerLoader.swift} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Sources/SHLLM/{TokenizersLoader.swift => TokenizerLoader.swift} (100%) diff --git a/Sources/SHLLM/TokenizersLoader.swift b/Sources/SHLLM/TokenizerLoader.swift similarity index 100% rename from Sources/SHLLM/TokenizersLoader.swift rename to Sources/SHLLM/TokenizerLoader.swift From 177f0fe2be6a75e30efdd90b117e9480560e944c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 13:55:32 +0000 Subject: [PATCH 7/7] Wrap all Tokenizers.TokenizerError cases in applyChatTemplate Agent-Logs-Url: https://github.com/shareup/shllm/sessions/4ddd1be8-a557-4d98-a213-73bbc4f6a933 Co-authored-by: atdrendel <202402+atdrendel@users.noreply.github.com> --- Sources/SHLLM/TokenizerLoader.swift | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Sources/SHLLM/TokenizerLoader.swift b/Sources/SHLLM/TokenizerLoader.swift index ce4ebe8..0b04f27 100644 --- a/Sources/SHLLM/TokenizerLoader.swift +++ b/Sources/SHLLM/TokenizerLoader.swift @@ -47,10 +47,13 @@ private struct _Tokenizer: MLXLMCommon.Tokenizer { tools: tools, additionalContext: additionalContext ) - } catch let error as Tokenizers.TokenizerError - where error == .missingChatTemplate - { - throw MLXLMCommon.TokenizerError.missingChatTemplate + } catch let error as Tokenizers.TokenizerError { + switch error { + case .missingChatTemplate: + throw MLXLMCommon.TokenizerError.missingChatTemplate + default: + throw error as NSError + } } } }