안녕하세요.  
	아래와 같이 Langchain으로 변환했는데 source 부분은 있으나, page_content 부분이 공란으로 나옵니다.  
	 
	[소스코드]
 
	import json 
	import os 
	import subprocess 
	from langchain_community.document_loaders import UnstructuredHTMLLoader 
	from pathlib import Path 
	import base64 
	import http.client 
	from tqdm import tqdm 
	import requests
 
	# HTML 파일이 들어있는 폴더 
	html_files_dir = Path('/home/embeding/clovastudioguide')
 
	html_files = list(html_files_dir.glob("*.html"))
 
	clovastudiodatas = []
 
	for html_file in html_files: 
	    loader = UnstructuredHTMLLoader(str(html_file)) 
	    document_data = loader.load() 
	    print(document_data) 
	    clovastudiodatas.append(document_data) 
	    print(f"Processed {html_file}") 
	 
 
	[실행결과] 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-glossary.html'})] 
	Processed /home/embeding/clovastudioguide/clovastudio-glossary.html 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-screen.html'})] 
	Processed /home/embeding/clovastudioguide/clovastudio-screen.html 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-info.html'})] 
	Processed /home/embeding/clovastudioguide/clovastudio-info.html 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-playground.html'})] 
	Processed /home/embeding/clovastudioguide/clovastudio-playground.html 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-start.html'})] 
	Processed /home/embeding/clovastudioguide/clovastudio-start.html 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-procedure.html'})] 
	Processed /home/embeding/clovastudioguide/clovastudio-procedure.html 
	[Document(page_content='', metadata={'source': '/home/embeding/clovastudioguide/clovastudio-playground01.html'})]