Edit on GitHub

datasources.xiaohongshu.search_rednote

Import scraped RedNote data

It's prohibitively difficult to scrape data from RedNote within 4CAT itself due to its aggressive rate limiting. Instead, import data collected elsewhere.

  1"""
  2Import scraped RedNote data
  3
  4It's prohibitively difficult to scrape data from RedNote within 4CAT itself due
  5to its aggressive rate limiting. Instead, import data collected elsewhere.
  6"""
  7import re
  8
  9from datetime import datetime
 10
 11from backend.lib.search import Search
 12from common.lib.item_mapping import MappedItem, MissingMappedField
 13from common.lib.helpers import normalize_url_encoding
 14
 15
 16class SearchRedNote(Search):
 17    """
 18    Import scraped RedNote/Xiaohongshu/XSH data
 19    """
 20    type = "xiaohongshu-search"  # job ID
 21    category = "Search"  # category
 22    title = "Import scraped RedNote data"  # title displayed in UI
 23    description = "Import RedNote data collected with an external tool such as Zeeschuimer."  # description displayed in UI
 24    extension = "ndjson"  # extension of result file, used internally and in UI
 25    is_from_zeeschuimer = True
 26
 27    # not available as a processor for existing datasets
 28    accepts = [None]
 29    references = [
 30        "[Zeeschuimer browser extension](https://github.com/digitalmethodsinitiative/zeeschuimer)",
 31        "[Worksheet: Capturing TikTok data with Zeeschuimer and 4CAT](https://tinyurl.com/nmrw-zeeschuimer-tiktok)"
 32    ]
 33
 34    def get_items(self, query):
 35        """
 36        Run custom search
 37
 38        Not available for RedNote
 39        """
 40        raise NotImplementedError("RedNote/Xiaohongshu datasets can only be created by importing data from elsewhere")
 41
 42
 43    @staticmethod
 44    def map_item(post):
 45        """
 46        Map XSH object to 4CAT item
 47
 48        Depending on whether the object was captured from JSON or HTML, treat it
 49        differently. A lot of data is missing from HTML objects.
 50
 51        :param post:
 52        :return:
 53        """
 54        if post.get("_zs-origin") == "html":
 55            return SearchRedNote.map_item_from_html(post)
 56        else:
 57            if "note" in post:
 58                return SearchRedNote.map_item_from_json_embedded(post)
 59            else:
 60                return SearchRedNote.map_item_from_json_api_explore(post)
 61
 62    @staticmethod
 63    def map_item_from_json_api_explore(post):
 64        """
 65        Map API-sourced XSH object to 4CAT item
 66
 67        Most straightforward - JSON objects from the XSH web API, which do
 68        however not always contain the same fields.
 69
 70        :param dict post:
 71        :return MappedItem:
 72        """
 73        item = post["note_card"] if post.get("type") != "video" else post
 74        item_id = post.get("id", post.get("note_id"))
 75
 76
 77        # Images
 78        images = []
 79        if item.get("image_list"):
 80            for image in item["image_list"]:
 81                if "url_default" in image and image["url_default"]:
 82                    images.append(image["url_default"])
 83                elif "info_list" in image and image["info_list"]:
 84                    for img_info in image["info_list"]:
 85                        found = False
 86                        if img_info.get("image_scene") == "WB_DFT":
 87                            images.append(img_info["url"])
 88                            found = True
 89                            break
 90                    if not found:
 91                        images.append(image["info_list"][0]["url"])
 92        elif item.get("cover"):
 93            images.append(item["cover"]["url_default"])
 94        else:
 95            # no image found;
 96            images = MissingMappedField("")       
 97
 98        # permalinks need this token to work, else you get a 404 not found
 99        xsec_bit = f"?xsec_token={post['xsec_token']}" if post.get("xsec_token") else ""
100        if item.get("video", {}).get("media"):
101            video_url = item["video"]["media"]["stream"]["h264"][0]["master_url"]
102        else:
103            video_url = MissingMappedField("")
104
105        timestamp = item.get("time", None)
106        return MappedItem({
107            "collected_from_url": normalize_url_encoding(post.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
108            "id": item_id,
109            "thread_id": item_id,
110            "url": f"https://www.xiaohongshu.com/explore/{post['id']}{xsec_bit}",
111            "title": item.get("display_title", ""),
112            "body": item.get("desc", "") if "desc" in item else MissingMappedField(""),
113            "hashtags": ",".join(re.findall(r"#([^\s!@#$%^&*()_+{}:\"|<>?\[\];'\,./`~]+)", item["desc"])) if "desc" in item else MissingMappedField(""),
114            "timestamp": datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M:%S") if timestamp else MissingMappedField(""),
115            "author": item["user"]["nickname"],
116            "author_avatar_url": item["user"]["avatar"],
117            "image_urls": ",".join(images) if type(images) is list else images,
118            "video_url": video_url,
119            # only available when loading an individual post page, so skip
120            # "tags": ",".join(t["name"] for t in item["tag_list"]),
121            "likes": item["interact_info"]["liked_count"],
122            # "collects": item["interact_info"]["collected_count"],
123            # "comments": item["interact_info"]["comment_count"],
124            # "shares": item["interact_info"]["share_count"],
125            "unix_timestamp": int(timestamp / 1000) if timestamp else MissingMappedField(""),
126        })
127
128    @staticmethod
129    def map_item_from_json_embedded(item):
130        """
131        Map JSON object from an XHS HTML page
132
133        JSON objects from the HTML are formatted slightly differently, mostly
134        in that they use camelCase instead of underscores, but we can also
135        make a few more assumptions about the data
136
137        :param dict item:
138        :return MappedItem:
139        """
140        note = item["note"]
141        image = note["imageList"][0]["urlDefault"]
142        # permalinks need this token to work, else you get a 404 not found
143        xsec_bit = f"?xsec_token={note['xsecToken']}"
144        timestamp = note.get("time", None)
145
146        if "interactInfo" in note:
147            likes = note["interactInfo"]["likedCount"]
148        elif "interact_info" in note:
149            likes = note["interact_info"]["liked_count"]
150        elif "likes" in note:
151            likes = note["likes"]
152        else:
153            likes = MissingMappedField("")
154
155        return MappedItem({
156            "collected_from_url": normalize_url_encoding(item.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
157            "id": item["id"],
158            "thread_id": item["id"],
159            "url": f"https://www.xiaohongshu.com/explore/{item['id']}{xsec_bit}",
160            "title": note.get("title", ""),
161            "body": note.get("desc", "") if "desc" in note else MissingMappedField(""),
162            "hashtags": ",".join(re.findall(r"#([^\s!@#$%^&*()_+{}:\"|<>?\[\];'\,./`~]+)", note["desc"])) if "desc" in note else MissingMappedField(""),
163            "timestamp": datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M:%S") if timestamp else MissingMappedField(""),
164            "author": note["user"]["nickname"],
165            "author_avatar_url": note["user"]["avatar"],
166            "image_url": image,
167            "video_url": MissingMappedField(""),
168            # only available when loading an individual post page, so skip
169            # "tags": ",".join(t["name"] for t in item["tag_list"]),
170            "likes": likes,
171            # "collects": item["interact_info"]["collected_count"],
172            # "comments": item["interact_info"]["comment_count"],
173            # "shares": item["interact_info"]["share_count"],
174            "unix_timestamp": int(timestamp / 1000) if timestamp else MissingMappedField(""),
175        })
176
177    def map_item_from_html(item):
178        """
179        Map pre-mapped item
180
181        These have been mapped by Zeeschuimer from the page HTML and contain
182        less data than JSON objects (but enough to be useful in some cases).
183
184        :param dict item:
185        :return MappedItem:
186        """
187        return MappedItem({
188            "collected_from_url": normalize_url_encoding(item.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
189            "id": item["id"],
190            "thread_id": item["id"],
191            "url": f"https://www.xiaohongshu.com{item['url']}",
192            "title": item["title"],
193            "body": MissingMappedField(""),
194            "hashtags": MissingMappedField(""),
195            "timestamp": MissingMappedField(""),
196            "author": item["author_name"],
197            "author_avatar_url": item["author_avatar_url"],
198            "image_url": item["thumbnail_url"],
199            "video_url": MissingMappedField(""),
200            # "tags": MissingMappedField(""),
201            "likes": item["likes"],
202            # "collects": MissingMappedField(""),
203            # "comments": MissingMappedField(""),
204            # "shares": MissingMappedField(""),
205            "unix_timestamp": MissingMappedField(""),
206        })
class SearchRedNote(backend.lib.search.Search):
 17class SearchRedNote(Search):
 18    """
 19    Import scraped RedNote/Xiaohongshu/XSH data
 20    """
 21    type = "xiaohongshu-search"  # job ID
 22    category = "Search"  # category
 23    title = "Import scraped RedNote data"  # title displayed in UI
 24    description = "Import RedNote data collected with an external tool such as Zeeschuimer."  # description displayed in UI
 25    extension = "ndjson"  # extension of result file, used internally and in UI
 26    is_from_zeeschuimer = True
 27
 28    # not available as a processor for existing datasets
 29    accepts = [None]
 30    references = [
 31        "[Zeeschuimer browser extension](https://github.com/digitalmethodsinitiative/zeeschuimer)",
 32        "[Worksheet: Capturing TikTok data with Zeeschuimer and 4CAT](https://tinyurl.com/nmrw-zeeschuimer-tiktok)"
 33    ]
 34
 35    def get_items(self, query):
 36        """
 37        Run custom search
 38
 39        Not available for RedNote
 40        """
 41        raise NotImplementedError("RedNote/Xiaohongshu datasets can only be created by importing data from elsewhere")
 42
 43
 44    @staticmethod
 45    def map_item(post):
 46        """
 47        Map XSH object to 4CAT item
 48
 49        Depending on whether the object was captured from JSON or HTML, treat it
 50        differently. A lot of data is missing from HTML objects.
 51
 52        :param post:
 53        :return:
 54        """
 55        if post.get("_zs-origin") == "html":
 56            return SearchRedNote.map_item_from_html(post)
 57        else:
 58            if "note" in post:
 59                return SearchRedNote.map_item_from_json_embedded(post)
 60            else:
 61                return SearchRedNote.map_item_from_json_api_explore(post)
 62
 63    @staticmethod
 64    def map_item_from_json_api_explore(post):
 65        """
 66        Map API-sourced XSH object to 4CAT item
 67
 68        Most straightforward - JSON objects from the XSH web API, which do
 69        however not always contain the same fields.
 70
 71        :param dict post:
 72        :return MappedItem:
 73        """
 74        item = post["note_card"] if post.get("type") != "video" else post
 75        item_id = post.get("id", post.get("note_id"))
 76
 77
 78        # Images
 79        images = []
 80        if item.get("image_list"):
 81            for image in item["image_list"]:
 82                if "url_default" in image and image["url_default"]:
 83                    images.append(image["url_default"])
 84                elif "info_list" in image and image["info_list"]:
 85                    for img_info in image["info_list"]:
 86                        found = False
 87                        if img_info.get("image_scene") == "WB_DFT":
 88                            images.append(img_info["url"])
 89                            found = True
 90                            break
 91                    if not found:
 92                        images.append(image["info_list"][0]["url"])
 93        elif item.get("cover"):
 94            images.append(item["cover"]["url_default"])
 95        else:
 96            # no image found;
 97            images = MissingMappedField("")       
 98
 99        # permalinks need this token to work, else you get a 404 not found
100        xsec_bit = f"?xsec_token={post['xsec_token']}" if post.get("xsec_token") else ""
101        if item.get("video", {}).get("media"):
102            video_url = item["video"]["media"]["stream"]["h264"][0]["master_url"]
103        else:
104            video_url = MissingMappedField("")
105
106        timestamp = item.get("time", None)
107        return MappedItem({
108            "collected_from_url": normalize_url_encoding(post.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
109            "id": item_id,
110            "thread_id": item_id,
111            "url": f"https://www.xiaohongshu.com/explore/{post['id']}{xsec_bit}",
112            "title": item.get("display_title", ""),
113            "body": item.get("desc", "") if "desc" in item else MissingMappedField(""),
114            "hashtags": ",".join(re.findall(r"#([^\s!@#$%^&*()_+{}:\"|<>?\[\];'\,./`~]+)", item["desc"])) if "desc" in item else MissingMappedField(""),
115            "timestamp": datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M:%S") if timestamp else MissingMappedField(""),
116            "author": item["user"]["nickname"],
117            "author_avatar_url": item["user"]["avatar"],
118            "image_urls": ",".join(images) if type(images) is list else images,
119            "video_url": video_url,
120            # only available when loading an individual post page, so skip
121            # "tags": ",".join(t["name"] for t in item["tag_list"]),
122            "likes": item["interact_info"]["liked_count"],
123            # "collects": item["interact_info"]["collected_count"],
124            # "comments": item["interact_info"]["comment_count"],
125            # "shares": item["interact_info"]["share_count"],
126            "unix_timestamp": int(timestamp / 1000) if timestamp else MissingMappedField(""),
127        })
128
129    @staticmethod
130    def map_item_from_json_embedded(item):
131        """
132        Map JSON object from an XHS HTML page
133
134        JSON objects from the HTML are formatted slightly differently, mostly
135        in that they use camelCase instead of underscores, but we can also
136        make a few more assumptions about the data
137
138        :param dict item:
139        :return MappedItem:
140        """
141        note = item["note"]
142        image = note["imageList"][0]["urlDefault"]
143        # permalinks need this token to work, else you get a 404 not found
144        xsec_bit = f"?xsec_token={note['xsecToken']}"
145        timestamp = note.get("time", None)
146
147        if "interactInfo" in note:
148            likes = note["interactInfo"]["likedCount"]
149        elif "interact_info" in note:
150            likes = note["interact_info"]["liked_count"]
151        elif "likes" in note:
152            likes = note["likes"]
153        else:
154            likes = MissingMappedField("")
155
156        return MappedItem({
157            "collected_from_url": normalize_url_encoding(item.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
158            "id": item["id"],
159            "thread_id": item["id"],
160            "url": f"https://www.xiaohongshu.com/explore/{item['id']}{xsec_bit}",
161            "title": note.get("title", ""),
162            "body": note.get("desc", "") if "desc" in note else MissingMappedField(""),
163            "hashtags": ",".join(re.findall(r"#([^\s!@#$%^&*()_+{}:\"|<>?\[\];'\,./`~]+)", note["desc"])) if "desc" in note else MissingMappedField(""),
164            "timestamp": datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M:%S") if timestamp else MissingMappedField(""),
165            "author": note["user"]["nickname"],
166            "author_avatar_url": note["user"]["avatar"],
167            "image_url": image,
168            "video_url": MissingMappedField(""),
169            # only available when loading an individual post page, so skip
170            # "tags": ",".join(t["name"] for t in item["tag_list"]),
171            "likes": likes,
172            # "collects": item["interact_info"]["collected_count"],
173            # "comments": item["interact_info"]["comment_count"],
174            # "shares": item["interact_info"]["share_count"],
175            "unix_timestamp": int(timestamp / 1000) if timestamp else MissingMappedField(""),
176        })
177
178    def map_item_from_html(item):
179        """
180        Map pre-mapped item
181
182        These have been mapped by Zeeschuimer from the page HTML and contain
183        less data than JSON objects (but enough to be useful in some cases).
184
185        :param dict item:
186        :return MappedItem:
187        """
188        return MappedItem({
189            "collected_from_url": normalize_url_encoding(item.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
190            "id": item["id"],
191            "thread_id": item["id"],
192            "url": f"https://www.xiaohongshu.com{item['url']}",
193            "title": item["title"],
194            "body": MissingMappedField(""),
195            "hashtags": MissingMappedField(""),
196            "timestamp": MissingMappedField(""),
197            "author": item["author_name"],
198            "author_avatar_url": item["author_avatar_url"],
199            "image_url": item["thumbnail_url"],
200            "video_url": MissingMappedField(""),
201            # "tags": MissingMappedField(""),
202            "likes": item["likes"],
203            # "collects": MissingMappedField(""),
204            # "comments": MissingMappedField(""),
205            # "shares": MissingMappedField(""),
206            "unix_timestamp": MissingMappedField(""),
207        })

Import scraped RedNote/Xiaohongshu/XSH data

type = 'xiaohongshu-search'
category = 'Search'
title = 'Import scraped RedNote data'
description = 'Import RedNote data collected with an external tool such as Zeeschuimer.'
extension = 'ndjson'
is_from_zeeschuimer = True
accepts = [None]
references = ['[Zeeschuimer browser extension](https://github.com/digitalmethodsinitiative/zeeschuimer)', '[Worksheet: Capturing TikTok data with Zeeschuimer and 4CAT](https://tinyurl.com/nmrw-zeeschuimer-tiktok)']
def get_items(self, query):
35    def get_items(self, query):
36        """
37        Run custom search
38
39        Not available for RedNote
40        """
41        raise NotImplementedError("RedNote/Xiaohongshu datasets can only be created by importing data from elsewhere")

Run custom search

Not available for RedNote

@staticmethod
def map_item(post):
44    @staticmethod
45    def map_item(post):
46        """
47        Map XSH object to 4CAT item
48
49        Depending on whether the object was captured from JSON or HTML, treat it
50        differently. A lot of data is missing from HTML objects.
51
52        :param post:
53        :return:
54        """
55        if post.get("_zs-origin") == "html":
56            return SearchRedNote.map_item_from_html(post)
57        else:
58            if "note" in post:
59                return SearchRedNote.map_item_from_json_embedded(post)
60            else:
61                return SearchRedNote.map_item_from_json_api_explore(post)

Map XSH object to 4CAT item

Depending on whether the object was captured from JSON or HTML, treat it differently. A lot of data is missing from HTML objects.

Parameters
  • post:
Returns
@staticmethod
def map_item_from_json_api_explore(post):
 63    @staticmethod
 64    def map_item_from_json_api_explore(post):
 65        """
 66        Map API-sourced XSH object to 4CAT item
 67
 68        Most straightforward - JSON objects from the XSH web API, which do
 69        however not always contain the same fields.
 70
 71        :param dict post:
 72        :return MappedItem:
 73        """
 74        item = post["note_card"] if post.get("type") != "video" else post
 75        item_id = post.get("id", post.get("note_id"))
 76
 77
 78        # Images
 79        images = []
 80        if item.get("image_list"):
 81            for image in item["image_list"]:
 82                if "url_default" in image and image["url_default"]:
 83                    images.append(image["url_default"])
 84                elif "info_list" in image and image["info_list"]:
 85                    for img_info in image["info_list"]:
 86                        found = False
 87                        if img_info.get("image_scene") == "WB_DFT":
 88                            images.append(img_info["url"])
 89                            found = True
 90                            break
 91                    if not found:
 92                        images.append(image["info_list"][0]["url"])
 93        elif item.get("cover"):
 94            images.append(item["cover"]["url_default"])
 95        else:
 96            # no image found;
 97            images = MissingMappedField("")       
 98
 99        # permalinks need this token to work, else you get a 404 not found
100        xsec_bit = f"?xsec_token={post['xsec_token']}" if post.get("xsec_token") else ""
101        if item.get("video", {}).get("media"):
102            video_url = item["video"]["media"]["stream"]["h264"][0]["master_url"]
103        else:
104            video_url = MissingMappedField("")
105
106        timestamp = item.get("time", None)
107        return MappedItem({
108            "collected_from_url": normalize_url_encoding(post.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
109            "id": item_id,
110            "thread_id": item_id,
111            "url": f"https://www.xiaohongshu.com/explore/{post['id']}{xsec_bit}",
112            "title": item.get("display_title", ""),
113            "body": item.get("desc", "") if "desc" in item else MissingMappedField(""),
114            "hashtags": ",".join(re.findall(r"#([^\s!@#$%^&*()_+{}:\"|<>?\[\];'\,./`~]+)", item["desc"])) if "desc" in item else MissingMappedField(""),
115            "timestamp": datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M:%S") if timestamp else MissingMappedField(""),
116            "author": item["user"]["nickname"],
117            "author_avatar_url": item["user"]["avatar"],
118            "image_urls": ",".join(images) if type(images) is list else images,
119            "video_url": video_url,
120            # only available when loading an individual post page, so skip
121            # "tags": ",".join(t["name"] for t in item["tag_list"]),
122            "likes": item["interact_info"]["liked_count"],
123            # "collects": item["interact_info"]["collected_count"],
124            # "comments": item["interact_info"]["comment_count"],
125            # "shares": item["interact_info"]["share_count"],
126            "unix_timestamp": int(timestamp / 1000) if timestamp else MissingMappedField(""),
127        })

Map API-sourced XSH object to 4CAT item

Most straightforward - JSON objects from the XSH web API, which do however not always contain the same fields.

Parameters
  • dict post:
Returns
@staticmethod
def map_item_from_json_embedded(item):
129    @staticmethod
130    def map_item_from_json_embedded(item):
131        """
132        Map JSON object from an XHS HTML page
133
134        JSON objects from the HTML are formatted slightly differently, mostly
135        in that they use camelCase instead of underscores, but we can also
136        make a few more assumptions about the data
137
138        :param dict item:
139        :return MappedItem:
140        """
141        note = item["note"]
142        image = note["imageList"][0]["urlDefault"]
143        # permalinks need this token to work, else you get a 404 not found
144        xsec_bit = f"?xsec_token={note['xsecToken']}"
145        timestamp = note.get("time", None)
146
147        if "interactInfo" in note:
148            likes = note["interactInfo"]["likedCount"]
149        elif "interact_info" in note:
150            likes = note["interact_info"]["liked_count"]
151        elif "likes" in note:
152            likes = note["likes"]
153        else:
154            likes = MissingMappedField("")
155
156        return MappedItem({
157            "collected_from_url": normalize_url_encoding(item.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
158            "id": item["id"],
159            "thread_id": item["id"],
160            "url": f"https://www.xiaohongshu.com/explore/{item['id']}{xsec_bit}",
161            "title": note.get("title", ""),
162            "body": note.get("desc", "") if "desc" in note else MissingMappedField(""),
163            "hashtags": ",".join(re.findall(r"#([^\s!@#$%^&*()_+{}:\"|<>?\[\];'\,./`~]+)", note["desc"])) if "desc" in note else MissingMappedField(""),
164            "timestamp": datetime.fromtimestamp(timestamp / 1000).strftime("%Y-%m-%d %H:%M:%S") if timestamp else MissingMappedField(""),
165            "author": note["user"]["nickname"],
166            "author_avatar_url": note["user"]["avatar"],
167            "image_url": image,
168            "video_url": MissingMappedField(""),
169            # only available when loading an individual post page, so skip
170            # "tags": ",".join(t["name"] for t in item["tag_list"]),
171            "likes": likes,
172            # "collects": item["interact_info"]["collected_count"],
173            # "comments": item["interact_info"]["comment_count"],
174            # "shares": item["interact_info"]["share_count"],
175            "unix_timestamp": int(timestamp / 1000) if timestamp else MissingMappedField(""),
176        })

Map JSON object from an XHS HTML page

JSON objects from the HTML are formatted slightly differently, mostly in that they use camelCase instead of underscores, but we can also make a few more assumptions about the data

Parameters
  • dict item:
Returns
def map_item_from_html(item):
178    def map_item_from_html(item):
179        """
180        Map pre-mapped item
181
182        These have been mapped by Zeeschuimer from the page HTML and contain
183        less data than JSON objects (but enough to be useful in some cases).
184
185        :param dict item:
186        :return MappedItem:
187        """
188        return MappedItem({
189            "collected_from_url": normalize_url_encoding(item.get("__import_meta", {}).get("source_platform_url", "")),  # Zeeschuimer metadata
190            "id": item["id"],
191            "thread_id": item["id"],
192            "url": f"https://www.xiaohongshu.com{item['url']}",
193            "title": item["title"],
194            "body": MissingMappedField(""),
195            "hashtags": MissingMappedField(""),
196            "timestamp": MissingMappedField(""),
197            "author": item["author_name"],
198            "author_avatar_url": item["author_avatar_url"],
199            "image_url": item["thumbnail_url"],
200            "video_url": MissingMappedField(""),
201            # "tags": MissingMappedField(""),
202            "likes": item["likes"],
203            # "collects": MissingMappedField(""),
204            # "comments": MissingMappedField(""),
205            # "shares": MissingMappedField(""),
206            "unix_timestamp": MissingMappedField(""),
207        })

Map pre-mapped item

These have been mapped by Zeeschuimer from the page HTML and contain less data than JSON objects (but enough to be useful in some cases).

Parameters
  • dict item:
Returns