/*
 * Decompiled with CFR 0.152.
 */
package dev.langchain4j.model.jlama;

import com.github.tjake.jlama.model.AbstractModel;
import com.github.tjake.jlama.model.functions.Generator;
import com.github.tjake.jlama.safetensors.DType;
import com.github.tjake.jlama.safetensors.prompt.PromptSupport;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.internal.RetryUtils;
import dev.langchain4j.model.StreamingResponseHandler;
import dev.langchain4j.model.chat.StreamingChatLanguageModel;
import dev.langchain4j.model.jlama.JlamaLanguageModel;
import dev.langchain4j.model.jlama.JlamaModel;
import dev.langchain4j.model.jlama.JlamaModelRegistry;
import dev.langchain4j.model.jlama.spi.JlamaStreamingChatModelBuilderFactory;
import dev.langchain4j.model.output.FinishReason;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.output.TokenUsage;
import dev.langchain4j.spi.ServiceHelper;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.UUID;

public class JlamaStreamingChatModel
implements StreamingChatLanguageModel {
    private final AbstractModel model;
    private final Float temperature;
    private final Integer maxTokens;
    private final UUID id = UUID.randomUUID();

    public JlamaStreamingChatModel(Path modelCachePath, String modelName, String authToken, Integer threadCount, Boolean quantizeModelAtRuntime, Path workingDirectory, DType workingQuantizedType, Float temperature, Integer maxTokens) {
        JlamaModelRegistry registry = JlamaModelRegistry.getOrCreate(modelCachePath);
        JlamaModel jlamaModel = (JlamaModel)RetryUtils.withRetry(() -> registry.downloadModel(modelName, Optional.ofNullable(authToken)), (int)3);
        JlamaModel.Loader loader = jlamaModel.loader();
        if (quantizeModelAtRuntime != null && quantizeModelAtRuntime.booleanValue()) {
            loader = loader.quantized();
        }
        if (workingQuantizedType != null) {
            loader = loader.workingQuantizationType(workingQuantizedType);
        }
        if (threadCount != null) {
            loader = loader.threadCount(threadCount);
        }
        if (workingDirectory != null) {
            loader = loader.workingDirectory(workingDirectory);
        }
        this.model = loader.load();
        this.temperature = Float.valueOf(temperature == null ? 0.7f : temperature.floatValue());
        this.maxTokens = maxTokens == null ? this.model.getConfig().contextLength : maxTokens;
    }

    public static JlamaStreamingChatModelBuilder builder() {
        Iterator iterator = ServiceHelper.loadFactories(JlamaStreamingChatModelBuilderFactory.class).iterator();
        if (iterator.hasNext()) {
            JlamaStreamingChatModelBuilderFactory factory = (JlamaStreamingChatModelBuilderFactory)iterator.next();
            return (JlamaStreamingChatModelBuilder)factory.get();
        }
        return new JlamaStreamingChatModelBuilder();
    }

    public void generate(List<ChatMessage> messages, StreamingResponseHandler<AiMessage> handler) {
        if (this.model.promptSupport().isEmpty()) {
            throw new UnsupportedOperationException("This model does not support chat generation");
        }
        PromptSupport.Builder promptBuilder = ((PromptSupport)this.model.promptSupport().get()).builder();
        block7: for (ChatMessage message : messages) {
            switch (message.type()) {
                case SYSTEM: {
                    promptBuilder.addSystemMessage(message.text());
                    continue block7;
                }
                case USER: {
                    promptBuilder.addUserMessage(message.text());
                    continue block7;
                }
                case AI: {
                    promptBuilder.addAssistantMessage(message.text());
                    continue block7;
                }
            }
            throw new IllegalArgumentException("Unsupported message type: " + String.valueOf(message.type()));
        }
        try {
            Generator.Response r = this.model.generate(this.id, promptBuilder.build(), this.temperature.floatValue(), this.maxTokens.intValue(), (token, time) -> handler.onNext(token));
            handler.onComplete(Response.from((Object)AiMessage.from((String)r.responseText), (TokenUsage)new TokenUsage(Integer.valueOf(r.promptTokens), Integer.valueOf(r.generatedTokens)), (FinishReason)JlamaLanguageModel.toFinishReason(r.finishReason)));
        }
        catch (Throwable t) {
            handler.onError(t);
        }
    }

    public static class JlamaStreamingChatModelBuilder {
        private Path modelCachePath;
        private String modelName;
        private String authToken;
        private Integer threadCount;
        private Boolean quantizeModelAtRuntime;
        private Path workingDirectory;
        private DType workingQuantizedType;
        private Float temperature;
        private Integer maxTokens;

        public JlamaStreamingChatModelBuilder modelCachePath(Path modelCachePath) {
            this.modelCachePath = modelCachePath;
            return this;
        }

        public JlamaStreamingChatModelBuilder modelName(String modelName) {
            this.modelName = modelName;
            return this;
        }

        public JlamaStreamingChatModelBuilder authToken(String authToken) {
            this.authToken = authToken;
            return this;
        }

        public JlamaStreamingChatModelBuilder threadCount(Integer threadCount) {
            this.threadCount = threadCount;
            return this;
        }

        public JlamaStreamingChatModelBuilder quantizeModelAtRuntime(Boolean quantizeModelAtRuntime) {
            this.quantizeModelAtRuntime = quantizeModelAtRuntime;
            return this;
        }

        public JlamaStreamingChatModelBuilder workingDirectory(Path workingDirectory) {
            this.workingDirectory = workingDirectory;
            return this;
        }

        public JlamaStreamingChatModelBuilder workingQuantizedType(DType workingQuantizedType) {
            this.workingQuantizedType = workingQuantizedType;
            return this;
        }

        public JlamaStreamingChatModelBuilder temperature(Float temperature) {
            this.temperature = temperature;
            return this;
        }

        public JlamaStreamingChatModelBuilder maxTokens(Integer maxTokens) {
            this.maxTokens = maxTokens;
            return this;
        }

        public JlamaStreamingChatModel build() {
            return new JlamaStreamingChatModel(this.modelCachePath, this.modelName, this.authToken, this.threadCount, this.quantizeModelAtRuntime, this.workingDirectory, this.workingQuantizedType, this.temperature, this.maxTokens);
        }

        public String toString() {
            return "JlamaStreamingChatModel.JlamaStreamingChatModelBuilder(modelCachePath=" + String.valueOf(this.modelCachePath) + ", modelName=" + this.modelName + ", authToken=" + this.authToken + ", threadCount=" + this.threadCount + ", quantizeModelAtRuntime=" + this.quantizeModelAtRuntime + ", workingDirectory=" + String.valueOf(this.workingDirectory) + ", workingQuantizedType=" + String.valueOf(this.workingQuantizedType) + ", temperature=" + this.temperature + ", maxTokens=" + this.maxTokens + ")";
        }
    }
}

