datasources.truth.search_truth
Import scraped Truth Social data
1""" 2Import scraped Truth Social data 3""" 4import datetime 5 6from backend.lib.search import Search 7from common.lib.item_mapping import MappedItem 8 9 10class SearchGab(Search): 11 """ 12 Import scraped truth social data 13 """ 14 type = "truthsocial-search" # job ID 15 category = "Search" # category 16 title = "Import scraped Truth Social data" # title displayed in UI 17 description = "Import Truth Social data collected with an external tool such as Zeeschuimer." # description displayed in UI 18 extension = "ndjson" # extension of result file, used internally and in UI 19 is_from_zeeschuimer = True 20 fake = "" 21 22 # not available as a processor for existing datasets 23 accepts = [None] 24 25 def get_items(self, query): 26 """ 27 Run custom search 28 29 Not available for Truth Social 30 """ 31 raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere") 32 33 @staticmethod 34 def map_item(post): 35 """ 36 Parse Truth Social post 37 38 :param post: Data as received from Truth Social 39 :return dict: Mapped item 40 """ 41 errors = [] 42 post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") 43 images = [] 44 videos = [] 45 video_thumbs = [] 46 if "media_attachments" in post: 47 for media in post["media_attachments"]: 48 mtype = media.get("type") 49 if mtype == "image": 50 images.append(media.get("url")) 51 elif mtype == "video": 52 videos.append(media.get("url")) 53 video_thumbs.append(media.get("preview_url")) 54 elif mtype == "tv": 55 # Truth social has "TV channels" with videos 56 # These do not have direct links to media 57 # url is a thumbnail 58 video_thumbs.append(media.get("url")) 59 # preview_url is a smaller thumb 60 else: 61 errors.append(f"New media type: {mtype}") 62 63 group = post.get("group") if post.get("group") else {} 64 65 if post.get("quote_id", None): 66 thread_id = post.get("quote_id") 67 elif post.get("in_reply_to", None): 68 reply_to = post.get("in_reply_to") 69 while reply_to: 70 if reply_to.get("in_reply_to", None): 71 reply_to = reply_to.get("in_reply_to") 72 else: 73 thread_id = reply_to.get("id") 74 break 75 else: 76 thread_id = post.get("id") 77 78 mentions = [mention.get("username") for mention in (post.get("mentions") if post.get("mentions") else [])] 79 hashtags = [tag.get("name") for tag in (post.get("tags") if post.get("tags") else [])] 80 81 mapped_item = { 82 "id": post["id"], 83 "created_at": post["created_at"], 84 "body": post["content"], 85 "url": post.get("url", None), 86 "reblogs_count": post.get("reblogs_count", 0), 87 "replies_count": post.get("replies_count", 0), 88 89 "account_id": post["account"]["id"], 90 "account_username": post["account"]["username"], 91 "account_display_name": post["account"]["display_name"], 92 "account_avatar": post["account"]["avatar"], 93 "account_verified": post["account"]["verified"], 94 "account_followers": post["account"]["followers_count"], 95 "account_following": post["account"]["following_count"], 96 97 "mentions": ",".join(mentions), 98 "hashtags": ",".join(hashtags), 99 100 # media 101 "images": ",".join(images), 102 "video_thumbs": ",".join(video_thumbs), 103 "video_urls": ",".join(videos), 104 105 # group 106 "group_id": group.get("id", None), 107 "group_display_name": group.get("display_name", None), 108 "group_avatar": group.get("avatar", None), 109 "group_note": group.get("note", None), 110 "group_members_count": group.get("members_count", 0), 111 112 "thread_id": thread_id, 113 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 114 } 115 116 return MappedItem(mapped_item, message="; ".join(errors))
11class SearchGab(Search): 12 """ 13 Import scraped truth social data 14 """ 15 type = "truthsocial-search" # job ID 16 category = "Search" # category 17 title = "Import scraped Truth Social data" # title displayed in UI 18 description = "Import Truth Social data collected with an external tool such as Zeeschuimer." # description displayed in UI 19 extension = "ndjson" # extension of result file, used internally and in UI 20 is_from_zeeschuimer = True 21 fake = "" 22 23 # not available as a processor for existing datasets 24 accepts = [None] 25 26 def get_items(self, query): 27 """ 28 Run custom search 29 30 Not available for Truth Social 31 """ 32 raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere") 33 34 @staticmethod 35 def map_item(post): 36 """ 37 Parse Truth Social post 38 39 :param post: Data as received from Truth Social 40 :return dict: Mapped item 41 """ 42 errors = [] 43 post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") 44 images = [] 45 videos = [] 46 video_thumbs = [] 47 if "media_attachments" in post: 48 for media in post["media_attachments"]: 49 mtype = media.get("type") 50 if mtype == "image": 51 images.append(media.get("url")) 52 elif mtype == "video": 53 videos.append(media.get("url")) 54 video_thumbs.append(media.get("preview_url")) 55 elif mtype == "tv": 56 # Truth social has "TV channels" with videos 57 # These do not have direct links to media 58 # url is a thumbnail 59 video_thumbs.append(media.get("url")) 60 # preview_url is a smaller thumb 61 else: 62 errors.append(f"New media type: {mtype}") 63 64 group = post.get("group") if post.get("group") else {} 65 66 if post.get("quote_id", None): 67 thread_id = post.get("quote_id") 68 elif post.get("in_reply_to", None): 69 reply_to = post.get("in_reply_to") 70 while reply_to: 71 if reply_to.get("in_reply_to", None): 72 reply_to = reply_to.get("in_reply_to") 73 else: 74 thread_id = reply_to.get("id") 75 break 76 else: 77 thread_id = post.get("id") 78 79 mentions = [mention.get("username") for mention in (post.get("mentions") if post.get("mentions") else [])] 80 hashtags = [tag.get("name") for tag in (post.get("tags") if post.get("tags") else [])] 81 82 mapped_item = { 83 "id": post["id"], 84 "created_at": post["created_at"], 85 "body": post["content"], 86 "url": post.get("url", None), 87 "reblogs_count": post.get("reblogs_count", 0), 88 "replies_count": post.get("replies_count", 0), 89 90 "account_id": post["account"]["id"], 91 "account_username": post["account"]["username"], 92 "account_display_name": post["account"]["display_name"], 93 "account_avatar": post["account"]["avatar"], 94 "account_verified": post["account"]["verified"], 95 "account_followers": post["account"]["followers_count"], 96 "account_following": post["account"]["following_count"], 97 98 "mentions": ",".join(mentions), 99 "hashtags": ",".join(hashtags), 100 101 # media 102 "images": ",".join(images), 103 "video_thumbs": ",".join(video_thumbs), 104 "video_urls": ",".join(videos), 105 106 # group 107 "group_id": group.get("id", None), 108 "group_display_name": group.get("display_name", None), 109 "group_avatar": group.get("avatar", None), 110 "group_note": group.get("note", None), 111 "group_members_count": group.get("members_count", 0), 112 113 "thread_id": thread_id, 114 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 115 } 116 117 return MappedItem(mapped_item, message="; ".join(errors))
Import scraped truth social data
def
get_items(self, query):
26 def get_items(self, query): 27 """ 28 Run custom search 29 30 Not available for Truth Social 31 """ 32 raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere")
Run custom search
Not available for Truth Social
@staticmethod
def
map_item(post):
34 @staticmethod 35 def map_item(post): 36 """ 37 Parse Truth Social post 38 39 :param post: Data as received from Truth Social 40 :return dict: Mapped item 41 """ 42 errors = [] 43 post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") 44 images = [] 45 videos = [] 46 video_thumbs = [] 47 if "media_attachments" in post: 48 for media in post["media_attachments"]: 49 mtype = media.get("type") 50 if mtype == "image": 51 images.append(media.get("url")) 52 elif mtype == "video": 53 videos.append(media.get("url")) 54 video_thumbs.append(media.get("preview_url")) 55 elif mtype == "tv": 56 # Truth social has "TV channels" with videos 57 # These do not have direct links to media 58 # url is a thumbnail 59 video_thumbs.append(media.get("url")) 60 # preview_url is a smaller thumb 61 else: 62 errors.append(f"New media type: {mtype}") 63 64 group = post.get("group") if post.get("group") else {} 65 66 if post.get("quote_id", None): 67 thread_id = post.get("quote_id") 68 elif post.get("in_reply_to", None): 69 reply_to = post.get("in_reply_to") 70 while reply_to: 71 if reply_to.get("in_reply_to", None): 72 reply_to = reply_to.get("in_reply_to") 73 else: 74 thread_id = reply_to.get("id") 75 break 76 else: 77 thread_id = post.get("id") 78 79 mentions = [mention.get("username") for mention in (post.get("mentions") if post.get("mentions") else [])] 80 hashtags = [tag.get("name") for tag in (post.get("tags") if post.get("tags") else [])] 81 82 mapped_item = { 83 "id": post["id"], 84 "created_at": post["created_at"], 85 "body": post["content"], 86 "url": post.get("url", None), 87 "reblogs_count": post.get("reblogs_count", 0), 88 "replies_count": post.get("replies_count", 0), 89 90 "account_id": post["account"]["id"], 91 "account_username": post["account"]["username"], 92 "account_display_name": post["account"]["display_name"], 93 "account_avatar": post["account"]["avatar"], 94 "account_verified": post["account"]["verified"], 95 "account_followers": post["account"]["followers_count"], 96 "account_following": post["account"]["following_count"], 97 98 "mentions": ",".join(mentions), 99 "hashtags": ",".join(hashtags), 100 101 # media 102 "images": ",".join(images), 103 "video_thumbs": ",".join(video_thumbs), 104 "video_urls": ",".join(videos), 105 106 # group 107 "group_id": group.get("id", None), 108 "group_display_name": group.get("display_name", None), 109 "group_avatar": group.get("avatar", None), 110 "group_note": group.get("note", None), 111 "group_members_count": group.get("members_count", 0), 112 113 "thread_id": thread_id, 114 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 115 } 116 117 return MappedItem(mapped_item, message="; ".join(errors))
Parse Truth Social post
Parameters
- post: Data as received from Truth Social
Returns
Mapped item
Inherited Members
- backend.lib.worker.BasicWorker
- BasicWorker
- INTERRUPT_NONE
- INTERRUPT_RETRY
- INTERRUPT_CANCEL
- queue
- log
- manager
- interrupted
- modules
- init_time
- name
- run
- clean_up
- request_interrupt
- is_4cat_class
- backend.lib.search.Search
- max_workers
- prefix
- return_cols
- import_error_count
- import_warning_count
- process
- search
- import_from_file
- items_to_csv
- items_to_ndjson
- items_to_archive
- backend.lib.processor.BasicProcessor
- db
- job
- dataset
- owner
- source_dataset
- source_file
- config
- is_running_in_preset
- filepath
- work
- after_process
- remove_files
- abort
- iterate_proxied_requests
- push_proxied_request
- flush_proxied_requests
- iterate_archive_contents
- unpack_archive_contents
- extract_archived_file_by_name
- write_csv_items_and_finish
- write_archive_and_finish
- create_standalone
- save_annotations
- map_item_method_available
- get_mapped_item
- is_filter
- get_options
- get_status
- is_top_dataset
- is_from_collector
- get_extension
- is_rankable
- exclude_followup_processors
- is_4cat_processor