package kr.shineware.nlp.komoran.core.tokenizer;

import java.util.ArrayList;
import java.util.List;
import kr.peopleware.util.common.string.StringUtil;
import kr.shineware.nlp.komoran.core.tokenizer.constant.TOKENTYPE;
import kr.shineware.nlp.komoran.core.tokenizer.model.Token;

/* loaded from: input_file:kr/shineware/nlp/komoran/core/tokenizer/Tokenizer.class */
public class Tokenizer {
    private String data;

    public String getData() {
        return this.data;
    }

    public void setData(String str) {
        this.data = str;
    }

    public List<Token> compile(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : StringUtil.split(str, " ")) {
            int length = str2.length();
            StringBuffer stringBuffer = new StringBuffer();
            TOKENTYPE tokentype = null;
            for (int i = 0; i < length; i++) {
                char charAt = str2.charAt(i);
                TOKENTYPE tokenType = getTokenType(charAt);
                if ((tokentype == null || tokenType == tokentype) && tokenType != TOKENTYPE.ETC) {
                    stringBuffer.append(charAt);
                } else {
                    if (tokentype != null) {
                        arrayList.add(new Token(stringBuffer.toString(), tokentype));
                    }
                    stringBuffer = new StringBuffer();
                    stringBuffer.append(charAt);
                }
                tokentype = tokenType;
            }
            if (stringBuffer.length() != 0) {
                arrayList.add(new Token(stringBuffer.toString(), tokentype));
            }
            arrayList.add(new Token(" ", TOKENTYPE.SPACE));
        }
        return arrayList;
    }

    private TOKENTYPE getTokenType(char c) {
        if (StringUtil.isKorean(c)) {
            return TOKENTYPE.KOREAN;
        }
        if (!StringUtil.isEnglish(c) && !StringUtil.isChinese(c) && !StringUtil.isForeign(c)) {
            return StringUtil.isNumeric(c) ? TOKENTYPE.SN : TOKENTYPE.ETC;
        }
        return TOKENTYPE.FOREIGN;
    }
}
