Python Code

def categorize_data(label, keyword, tf_data_df): # สร้าง DataFrame เปล่าขึ้นมา categorized_df = pd.DataFrame(columns=list(tf_data_df.columns)) print(label) for word in keyword: # print(f"found {tf_data_df[tf_data_df['comment'].str.find(word)!=-1].shape[0]}") # ค้นหาคำที่เราต้องการแล้วเพิ่มเข้าไปใน DataFrame เปล่าที่สร้างไว้ categorized_df = categorized_df.append(tf_data_df[tf_data_df['comment'].str.find(word)!=-1], ignore_index=True) # ในการเพิ่ม DataFrame ที่หาเจอเข้าไปในแต่ละครั้งอาจจะมีข้อมูลซ้ำที่ซ้ำเพิ่มเข้าไปด้วยจึงทำการลบออก categorized_df = categorized_df.drop_duplicates(ignore_index=True) # เปลี่ยน type จาก type อื่นๆ ให้กลายเป็น type ที่เราต้องการ categorized_df['type'] = categorized_df['type'].replace('อื่นๆ', label) # แยกข้อมูลที่หาไม่เจอออกมา uncategorized_df = tf_data_df[np.logical_not(tf_data_df['comment'].isin(categorized_df['comment']))] return categorized_df, uncategorized_df

Python Code

Be the first to comment