import { GPT4OMINI_MODEL_SPEC } from "duck/graph/constants";
import { NonEmptyStringArray } from "duck/graph/types";
import { getLLM } from "duck/graph/utils";
import { runnableConfig } from "duck/ui/translate/agents/constants";
import { z } from "zod";

interface HasLanguage {
  language: string;
}

const nonEnglishDetectableLanguages: NonEmptyStringArray = [
  "Spanish",
  "French",
  "German",
  "Portuguese",
  "Slovenian",
  "Japanese",
  "Latin", // We want to treat Latin as English. The most reliable way is to specifically identify it.
];

const completeList: NonEmptyStringArray = [
  "English",
  ...nonEnglishDetectableLanguages,
  "Other",
  "Unknown",
];

const getPrompt = (text: string) =>
  `You are a language detection model. Your purpose is to analyze the provided text and classify it by the language of the text. 
Follow these guidelines:
- Output exactly one of: ${completeList.join(", ")}.
- If the text is composed mostly of English words or abbreviations (even if not in a full sentence), output 'English'.
- If you identify one of the other named languages (${nonEnglishDetectableLanguages.join(
    ", "
  )}), output that language.
- If you recognize a language that is not in the named list, output 'Other'.
- Only respond with 'Unknown' if you truly cannot identify the language.
  
Text:
${text}`;

const responseSchema = z.object({
  language: z.enum(completeList).describe("The language of the text"),
});

/**
 * Invoke the agent to determine the language of the text.
 * @param text
 * @returns
 */
const invokeDetermineLanguageAgent = async (
  text: string
): Promise<HasLanguage> => {
  const prompt = getPrompt(text);

  const determineLanguageAgent =
    getLLM(GPT4OMINI_MODEL_SPEC).withStructuredOutput(responseSchema);

  return determineLanguageAgent.invoke(prompt, runnableConfig);
};

export default invokeDetermineLanguageAgent;
