diff --git a/Libraries/LLM/Tokenizer.swift b/Libraries/LLM/Tokenizer.swift index 0159bec..72d7960 100644 --- a/Libraries/LLM/Tokenizer.swift +++ b/Libraries/LLM/Tokenizer.swift @@ -67,54 +67,13 @@ public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tok tokenizerConfig = Config(dictionary) } - // workaround: some merges can't be split on space in BPETokenizer - if let tokenizerClass = tokenizerConfig.tokenizerClass?.stringValue { - switch tokenizerClass { - case "T5Tokenizer": - break - default: - tokenizerData = discardUnhandledMerges(tokenizerData: tokenizerData) - } - } - let impl = try PreTrainedTokenizer( tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData) return Tokenizer(tokenizer: impl, tokenizerConfig: tokenizerConfig) } -public func discardUnhandledMerges(tokenizerData: Config) -> Config { - // see https://github.com/ml-explore/mlx-swift-examples/issues/1 - // and https://github.com/huggingface/swift-transformers/issues/51 - - if let model = tokenizerData.model { - if let merges = model.dictionary["merges"] as? [String] { - // discard any merges that can't be split on a space - // (required by BPETokenizer) - let newMerges = - merges - .filter { - $0.split(separator: " ").count == 2 - } - - if newMerges.count != merges.count { - var newModel = model.dictionary - newModel["merges"] = newMerges - - var newTokenizerData = tokenizerData.dictionary - newTokenizerData["model"] = newModel - - return Config(newTokenizerData) - } - } - } - - return tokenizerData -} - /// overrides for TokenizerModel/knownTokenizers let replacementTokenizers = [ - "CodeLlamaTokenizer": "LlamaTokenizer", - "GemmaTokenizer": "PreTrainedTokenizer", "Qwen2Tokenizer": "PreTrainedTokenizer", ] diff --git a/mlx-swift-examples.xcodeproj/project.pbxproj b/mlx-swift-examples.xcodeproj/project.pbxproj index e680b1d..97f0715 100644 --- a/mlx-swift-examples.xcodeproj/project.pbxproj +++ b/mlx-swift-examples.xcodeproj/project.pbxproj @@ -2220,8 +2220,8 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/huggingface/swift-transformers"; requirement = { - kind = upToNextMajorVersion; - minimumVersion = 0.1.2; + branch = main; + kind = branch; }; }; C392736E2B60699100368D5D /* XCRemoteSwiftPackageReference "swift-argument-parser" */ = { diff --git a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 4b4f79e..75af8d9 100644 --- a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -59,8 +59,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/huggingface/swift-transformers", "state" : { - "revision" : "564442fba36b0b694d730a62d0593e5f54043b55", - "version" : "0.1.2" + "branch" : "main", + "revision" : "24605a8c0cc974bec5b94a6752eb687bae77db31" } } ],