You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
160 lines
4.3 KiB
160 lines
4.3 KiB
1 year ago
|
# %%
|
||
|
#from arena import Arena
|
||
|
from dotenv import load_dotenv
|
||
|
load_dotenv()
|
||
|
|
||
|
import os
|
||
|
import datetime
|
||
|
import requests
|
||
|
from bs4 import BeautifulSoup
|
||
|
import pytumblr
|
||
|
from markdownify import markdownify
|
||
|
|
||
|
#%%
|
||
|
# Log in to Tumblr
|
||
|
|
||
|
tumblr = pytumblr.TumblrRestClient(
|
||
|
os.environ['TUMBLR_CONS_KEY'],
|
||
|
os.environ['TUMBLR_CONS_SECRET'],
|
||
|
os.environ['TUMBLR_AUTH_TOKEN'],
|
||
|
os.environ['TUMBLR_AUTH_SECRET']
|
||
|
)
|
||
|
|
||
|
# tumblr.info()
|
||
|
|
||
|
#%%
|
||
|
now= datetime.datetime.now()
|
||
|
arena_queue = []
|
||
|
timestamp = now.strftime('%Y.%m.%d-%H:%M')
|
||
|
|
||
|
|
||
|
#%%
|
||
|
# Get posts from Tumblr
|
||
|
#tumblr.posts('the-air-pump', tag='CFS')
|
||
|
|
||
|
def get_all_posts(client, blog, tag):
|
||
|
'''
|
||
|
Found here: https://stackoverflow.com/questions/47311845/print-more-than-20-posts-from-tumblr-api
|
||
|
'''
|
||
|
offset = 0
|
||
|
while True:
|
||
|
response = tumblr.posts(blog, limit=20, offset=offset, tag=tag, reblog_info=False, notes_info=False)
|
||
|
|
||
|
# Get the 'posts' field of the response
|
||
|
posts = response['posts']
|
||
|
|
||
|
if not posts: return
|
||
|
|
||
|
for post in posts:
|
||
|
yield post
|
||
|
|
||
|
# move to the next offset
|
||
|
offset += 20
|
||
|
|
||
|
#%%
|
||
|
|
||
|
def import_metadata(post):
|
||
|
date = post['date']
|
||
|
link = post['post_url']
|
||
|
t = post['type']
|
||
|
import_info = f'\n------------\n' \
|
||
|
f'Imported from Tumblr on: {timestamp} \n' \
|
||
|
f'Originally posted as a {t} on {date} \n' \
|
||
|
f'{link}'
|
||
|
return import_info
|
||
|
|
||
|
# %%
|
||
|
|
||
|
for post in get_all_posts(tumblr, 'the-air-pump', 'Internet Mythology'):
|
||
|
|
||
|
if post['type'] == 'photo':
|
||
|
for photo in post['photos']:
|
||
|
arena_post = {
|
||
|
'source': photo['original_size']['url'],
|
||
|
'description': markdownify(post['caption']),
|
||
|
'title': post['summary']
|
||
|
}
|
||
|
arena_post['description'] += import_metadata(post)
|
||
|
arena_queue.append(arena_post)
|
||
|
|
||
|
elif post['type'] == 'link':
|
||
|
arena_post = {
|
||
|
'source': post['url'],
|
||
|
'description': '',
|
||
|
'title': post['title']
|
||
|
}
|
||
|
arena_post['description'] += import_metadata(post)
|
||
|
arena_queue.append(arena_post)
|
||
|
|
||
|
elif post['type'] == 'quote':
|
||
|
text = markdownify(post['text'])
|
||
|
source = markdownify(post['source'])
|
||
|
arena_post = {
|
||
|
'content': text + '\n - ' + 'Source: ' + source,
|
||
|
'title': post['source'],
|
||
|
'description': 'Source: ' + source
|
||
|
}
|
||
|
arena_post['description'] += import_metadata(post)
|
||
|
arena_queue.append(arena_post)
|
||
|
|
||
|
elif post['type'] == 'text':
|
||
|
post_body = post['body']
|
||
|
soup = BeautifulSoup(post_body, 'lxml')
|
||
|
# extract text without images
|
||
|
text_soup = soup
|
||
|
text_content = ''
|
||
|
for figure in text_soup.select('figure'):
|
||
|
figure.extract()
|
||
|
for p in text_soup.find_all('p'):
|
||
|
for c in p.contents:
|
||
|
if type(c) == 'NavigableString':
|
||
|
text_content += str(c) + '\n'
|
||
|
elif type(c) == 'Tag':
|
||
|
if c.name == "a":
|
||
|
text_content += c.attrs['href']
|
||
|
|
||
|
text_content = markdownify(text_content)
|
||
|
# extract all images and make each into a post
|
||
|
images = soup.find_all('img')
|
||
|
for image in images:
|
||
|
arena_post = {
|
||
|
'source': image.attrs['src'],
|
||
|
'title': post['title'],
|
||
|
'description': 'Image extracted from Tumblr text post:\n' + text_content
|
||
|
}
|
||
|
arena_post['description'] += import_metadata(post)
|
||
|
arena_queue.append(arena_post)
|
||
|
|
||
|
# Post with only text content
|
||
|
arena_post = {
|
||
|
'title': post['title'],
|
||
|
'content': text_content,
|
||
|
'description': ''
|
||
|
}
|
||
|
arena_post['description'] += import_metadata(post)
|
||
|
arena_queue.append(arena_post)
|
||
|
|
||
|
# XXX TODO: handle sound, video posts and others that are skipped
|
||
|
|
||
|
else:
|
||
|
print('----')
|
||
|
print('Unsuported type: ' + post['type'])
|
||
|
print(post['post_url'])
|
||
|
|
||
|
#print(arena_queue)
|
||
|
|
||
|
#%%
|
||
|
# Are.na posting
|
||
|
|
||
|
url = "https://api.are.na/v2/channels/automation_inbox/blocks"
|
||
|
|
||
|
arena_queue.reverse()
|
||
|
|
||
|
print('Posting ' + str(len(arena_queue)) + ' blocks')
|
||
|
|
||
|
for block in arena_queue:
|
||
|
block['access_token'] = os.environ['ARENA_TOKEN']
|
||
|
x = requests.post(url, data=block)
|
||
|
|
||
|
print('Done')
|