datasources.truth.search_truth
Import scraped Truth Social data
1""" 2Import scraped Truth Social data 3""" 4import datetime 5import re 6 7from backend.lib.search import Search 8 9 10class SearchGab(Search): 11 """ 12 Import scraped truth social data 13 """ 14 type = "truthsocial-search" # job ID 15 category = "Search" # category 16 title = "Import scraped Truth Social data" # title displayed in UI 17 description = "Import Truth Social data collected with an external tool such as Zeeschuimer." # description displayed in UI 18 extension = "ndjson" # extension of result file, used internally and in UI 19 is_from_zeeschuimer = True 20 fake = "" 21 22 # not available as a processor for existing datasets 23 accepts = [None] 24 25 def get_items(self, query): 26 """ 27 Run custom search 28 29 Not available for Truth Social 30 """ 31 raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere") 32 33 @staticmethod 34 def map_item(post): 35 """ 36 Parse Truth Social post 37 38 :param node: Data as received from Truth Social 39 :return dict: Mapped item 40 """ 41 42 post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") 43 media_length = 0 44 if "media_attachments" in post: 45 media_length = len(post["media_attachments"]) 46 47 if media_length > 0: 48 mapped_item = { 49 "id": post["id"], 50 "created_at": post["created_at"], 51 "body": post["content"], 52 "url": post.get("url", None), 53 "reblogs_count": post.get("reblogs_count", 0), 54 "replies_count": post.get("replies_count", 0), 55 56 "account_id": post["account"]["id"], 57 "account_username": post["account"]["username"], 58 "account_display_name": post["account"]["display_name"], 59 "account_avatar": post["account"]["avatar"], 60 "account_verified": post["account"]["verified"], 61 "account_followers": post["account"]["followers_count"], 62 "account_following": post["account"]["following_count"], 63 64 "media_id": post["media_attachments"][0].get("id", None), 65 "media_type": post["media_attachments"][0].get("type", None), 66 "media_url": post["media_attachments"][0].get("url", None), 67 "media_preview_url": post["media_attachments"][0].get("preview_url", None), 68 69 #"group_id": post["group"].get("id", None), 70 #"group_display_name": post["group"].get("display_name", None), 71 #"group_avatar": post["group"].get("avatar", None), 72 #"group_header": post["group"].get("header", None), 73 #"group_members_count": post["group"].get("members_count", 0), 74 75 "thread_id": post["id"], 76 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 77 } 78 79 else: 80 mapped_item = { 81 "id": post["id"], 82 "created_at": post["created_at"], 83 "body": post["content"], 84 "url": post.get("url", None), 85 "reblogs_count": post.get("reblogs_count", 0), 86 "replies_count": post.get("replies_count", 0), 87 88 "account_id": post["account"]["id"], 89 "account_username": post["account"]["username"], 90 "account_display_name": post["account"]["display_name"], 91 "account_avatar": post["account"]["avatar"], 92 "account_verified": post["account"]["verified"], 93 "account_followers": post["account"]["followers_count"], 94 "account_following": post["account"]["following_count"], 95 96 #"group_id": post["group"].get("id", None), 97 #"group_display_name": post["group"].get("display_name", None), 98 #"group_avatar": post["group"].get("avatar", None), 99 #"group_header": post["group"].get("header", None), 100 #"group_members_count": post["group"].get("members_count", 0), 101 102 "thread_id": post["id"], 103 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 104 } 105 106 return mapped_item
11class SearchGab(Search): 12 """ 13 Import scraped truth social data 14 """ 15 type = "truthsocial-search" # job ID 16 category = "Search" # category 17 title = "Import scraped Truth Social data" # title displayed in UI 18 description = "Import Truth Social data collected with an external tool such as Zeeschuimer." # description displayed in UI 19 extension = "ndjson" # extension of result file, used internally and in UI 20 is_from_zeeschuimer = True 21 fake = "" 22 23 # not available as a processor for existing datasets 24 accepts = [None] 25 26 def get_items(self, query): 27 """ 28 Run custom search 29 30 Not available for Truth Social 31 """ 32 raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere") 33 34 @staticmethod 35 def map_item(post): 36 """ 37 Parse Truth Social post 38 39 :param node: Data as received from Truth Social 40 :return dict: Mapped item 41 """ 42 43 post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") 44 media_length = 0 45 if "media_attachments" in post: 46 media_length = len(post["media_attachments"]) 47 48 if media_length > 0: 49 mapped_item = { 50 "id": post["id"], 51 "created_at": post["created_at"], 52 "body": post["content"], 53 "url": post.get("url", None), 54 "reblogs_count": post.get("reblogs_count", 0), 55 "replies_count": post.get("replies_count", 0), 56 57 "account_id": post["account"]["id"], 58 "account_username": post["account"]["username"], 59 "account_display_name": post["account"]["display_name"], 60 "account_avatar": post["account"]["avatar"], 61 "account_verified": post["account"]["verified"], 62 "account_followers": post["account"]["followers_count"], 63 "account_following": post["account"]["following_count"], 64 65 "media_id": post["media_attachments"][0].get("id", None), 66 "media_type": post["media_attachments"][0].get("type", None), 67 "media_url": post["media_attachments"][0].get("url", None), 68 "media_preview_url": post["media_attachments"][0].get("preview_url", None), 69 70 #"group_id": post["group"].get("id", None), 71 #"group_display_name": post["group"].get("display_name", None), 72 #"group_avatar": post["group"].get("avatar", None), 73 #"group_header": post["group"].get("header", None), 74 #"group_members_count": post["group"].get("members_count", 0), 75 76 "thread_id": post["id"], 77 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 78 } 79 80 else: 81 mapped_item = { 82 "id": post["id"], 83 "created_at": post["created_at"], 84 "body": post["content"], 85 "url": post.get("url", None), 86 "reblogs_count": post.get("reblogs_count", 0), 87 "replies_count": post.get("replies_count", 0), 88 89 "account_id": post["account"]["id"], 90 "account_username": post["account"]["username"], 91 "account_display_name": post["account"]["display_name"], 92 "account_avatar": post["account"]["avatar"], 93 "account_verified": post["account"]["verified"], 94 "account_followers": post["account"]["followers_count"], 95 "account_following": post["account"]["following_count"], 96 97 #"group_id": post["group"].get("id", None), 98 #"group_display_name": post["group"].get("display_name", None), 99 #"group_avatar": post["group"].get("avatar", None), 100 #"group_header": post["group"].get("header", None), 101 #"group_members_count": post["group"].get("members_count", 0), 102 103 "thread_id": post["id"], 104 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 105 } 106 107 return mapped_item
Import scraped truth social data
def
get_items(self, query):
26 def get_items(self, query): 27 """ 28 Run custom search 29 30 Not available for Truth Social 31 """ 32 raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere")
Run custom search
Not available for Truth Social
@staticmethod
def
map_item(post):
34 @staticmethod 35 def map_item(post): 36 """ 37 Parse Truth Social post 38 39 :param node: Data as received from Truth Social 40 :return dict: Mapped item 41 """ 42 43 post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") 44 media_length = 0 45 if "media_attachments" in post: 46 media_length = len(post["media_attachments"]) 47 48 if media_length > 0: 49 mapped_item = { 50 "id": post["id"], 51 "created_at": post["created_at"], 52 "body": post["content"], 53 "url": post.get("url", None), 54 "reblogs_count": post.get("reblogs_count", 0), 55 "replies_count": post.get("replies_count", 0), 56 57 "account_id": post["account"]["id"], 58 "account_username": post["account"]["username"], 59 "account_display_name": post["account"]["display_name"], 60 "account_avatar": post["account"]["avatar"], 61 "account_verified": post["account"]["verified"], 62 "account_followers": post["account"]["followers_count"], 63 "account_following": post["account"]["following_count"], 64 65 "media_id": post["media_attachments"][0].get("id", None), 66 "media_type": post["media_attachments"][0].get("type", None), 67 "media_url": post["media_attachments"][0].get("url", None), 68 "media_preview_url": post["media_attachments"][0].get("preview_url", None), 69 70 #"group_id": post["group"].get("id", None), 71 #"group_display_name": post["group"].get("display_name", None), 72 #"group_avatar": post["group"].get("avatar", None), 73 #"group_header": post["group"].get("header", None), 74 #"group_members_count": post["group"].get("members_count", 0), 75 76 "thread_id": post["id"], 77 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 78 } 79 80 else: 81 mapped_item = { 82 "id": post["id"], 83 "created_at": post["created_at"], 84 "body": post["content"], 85 "url": post.get("url", None), 86 "reblogs_count": post.get("reblogs_count", 0), 87 "replies_count": post.get("replies_count", 0), 88 89 "account_id": post["account"]["id"], 90 "account_username": post["account"]["username"], 91 "account_display_name": post["account"]["display_name"], 92 "account_avatar": post["account"]["avatar"], 93 "account_verified": post["account"]["verified"], 94 "account_followers": post["account"]["followers_count"], 95 "account_following": post["account"]["following_count"], 96 97 #"group_id": post["group"].get("id", None), 98 #"group_display_name": post["group"].get("display_name", None), 99 #"group_avatar": post["group"].get("avatar", None), 100 #"group_header": post["group"].get("header", None), 101 #"group_members_count": post["group"].get("members_count", 0), 102 103 "thread_id": post["id"], 104 "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S") 105 } 106 107 return mapped_item
Parse Truth Social post
Parameters
- node: Data as received from Truth Social
Returns
Mapped item
Inherited Members
- backend.lib.worker.BasicWorker
- BasicWorker
- INTERRUPT_NONE
- INTERRUPT_RETRY
- INTERRUPT_CANCEL
- queue
- log
- manager
- interrupted
- modules
- init_time
- name
- run
- clean_up
- request_interrupt
- is_4cat_class
- backend.lib.search.Search
- max_workers
- prefix
- return_cols
- import_error_count
- import_warning_count
- process
- search
- import_from_file
- items_to_csv
- items_to_ndjson
- items_to_archive
- backend.lib.processor.BasicProcessor
- db
- job
- dataset
- owner
- source_dataset
- source_file
- config
- is_running_in_preset
- filepath
- work
- after_process
- remove_files
- abort
- add_field_to_parent
- iterate_archive_contents
- unpack_archive_contents
- extract_archived_file_by_name
- write_csv_items_and_finish
- write_archive_and_finish
- create_standalone
- map_item_method_available
- get_mapped_item
- is_filter
- get_options
- get_status
- is_top_dataset
- is_from_collector
- get_extension
- is_rankable
- exclude_followup_processors
- is_4cat_processor