Submit a Hexo Sitemap to Baidu with GitHub Actions
Posted onEdited on
Use a small Python script and a scheduled GitHub Actions workflow to submit your Hexo sitemap URLs to Baidu automatically.
When I looked at this blog’s rather modest page views, I started paying more attention to SEO. That was also the first time I logged into Baidu Search Resource Platform, only to discover that Baidu had indexed just 8 pages from my site.
No wonder almost all of my traffic was coming from Google and Bing. Baidu had barely indexed anything. Since Baidu does not provide a sitemap submission API like Google does, the only practical option is to submit URLs directly. So I put together a small workflow that lets this Hexo blog submit sitemap URLs to Baidu automatically through GitHub Actions.
Prepare the script
First, write a small Python script that downloads a sitemap.xml, extracts the URLs, and submits them to Baidu:
@staticmethod defgen_submit_url(site: str, token: str) -> str: """ generate url to submit to """ returnf'http://data.zz.baidu.com/urls?site={site}&token={token}'
@staticmethod defgen_sitemap_url(site: str, sitemap: str) -> str: """ generate url path to get sitemap """ return urljoin(site, sitemap)
@staticmethod defget_links_from_sitemap(sitemap_url) -> list: """ download sitemap, parse and get urls """ with request.urlopen(sitemap_url) as resp: data = resp.read()
root = ET.fromstring(data) return [_.text for _ in root.findall('./{http://www.sitemaps.org/schemas/sitemap/0.9}url/{http://www.sitemaps.org/schemas/sitemap/0.9}loc')]
@staticmethod defsubmit(submit_url: str, links: list): """ submit to baidu """ data = '\n'.join(links).encode('utf8') req = request.Request(submit_url, data=data) return request.urlopen(req).read().decode()
defrun(self, chunk_size=20, sleep_time=0.1): """ submit process """ links = self.get_links_from_sitemap(self.sitemap_url) print(f'Get {len(links)} links from sitemap: [{self.sitemap_url}]')
for chunk in chunker(links, chunk_size): resp = self.submit(self.submit_url, chunk) print(resp) if sleep_time: time.sleep(sleep_time)
time.sleep(1)
defget_args(): """ get cli args """ parser = argparse.ArgumentParser(description='Submit sitemap to Baidu') parser.add_argument('--site', '-s', type=str, dest='site', required=True, help='your site, eg: https://knktc.com') parser.add_argument('--token', '-t', type=str, dest='token', required=True, help='baidu ziyuan token, you may find your token in https://ziyuan.baidu.com/linksubmit') parser.add_argument('--sitemap', '-p', type=str, dest='sitemap', default='sitemap.xml', help='url path to get sitemap.xml file, default: sitemap.xml') parser.add_argument('--chunk', '-c', type=int, dest='chunk_size', default=100, help='how many urls should be submitted each time')
args = parser.parse_args()
return args
defmain(): """ main process """ args = get_args() site = args.site token = args.token sitemap_path = args.sitemap chunk_size = args.chunk_size