-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBlogBuilder.py
254 lines (218 loc) · 9.48 KB
/
BlogBuilder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
"""
This file would sit in the directory hosting your www directory. By default, it reads text files from "toproc" directory, then converts to HTML.
"""
import glob
import os
from lxml import html
############################
# New Article HTML Builder #
############################
def generateHtml(document):
'''
This function generates our new article's HTML code, based on the input document,
Input document is basic html markup, with some tag information added.
'''
content = open(document, 'r').read()
# Create intermediate list of sentences
intermediate = content.split('\n')
# Set some default values
meta_tag = "My Blog"
title_tag = "My Blog Default"
date_tag = ""
# Start iterating over the sentences to pull out any tags
# It is assumed that for each of the below tags ("METATAGS:", "TITLE:", "DATE:"), that they will each be on their own line,
# with one line of whitespace seperating them from each other.
# See example input article in the repo.
#
for sentence in intermediate:
print(sentence)
print(type(sentence))
if "METATAGS:" in sentence:
meta_tag = sentence.split(':')[1].strip().title()
print("+++++++++++++++++++++++++++++++=============================++++++++++++++++++++++++++++++",sentence)
intermediate.remove(sentence)
continue
elif "TITLE:" in sentence:
title_tag = sentence.split(':')[1].strip().title()
intermediate.remove(sentence)
continue
elif "DATE:" in sentence:
date_tag = sentence.split(':')[1].strip()
intermediate.remove(sentence)
continue
else:
continue
#
# Tags which were found above will be inserted into the header below (replacing the '{}', using the format() string method)
#
head = """<!DOCTYPE html>
<!-- This code was generated by BlogBuilder.py, which was written by Max Lee of Maxya IT. Email me to learn more. -->
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width">
<meta name="description" content="{}">
<meta type='tags' content='{}'>
<title>Your Blog Title{}</title>
<meta name="author" content="WHatever Your Name Is">
<link rel="stylesheet" href="css/style.css">
</head>\n""".format(meta_tag, meta_tag, title_tag)
#
# Put the body text back together, starting with the head.
# Notice the div with id "branding", which you can style using CSS.
#
body = head
body += """ <header>
<div class="container">
<a href='http://www.yoursitename.com'>
<div id="branding">
<img src="./img/sitelogo.png" alt="Site Logo here">
<h1>Site name here</h1>
</div>
</a>
<nav>
<ul>
<li class="current"><a href="http://yoursitename.com/blog/index.html">Blog Home</a></li>
</ul>
</nav>
</div>
</header>"""
#
# Create the HTML body by appending the head (above) with the input document's sentences (below)
#
body += "<body><div id='date'>Article date: {}</div>\n<div class='article_body'>\n".format(date_tag)
body += "\n".join(intermediate)
body += "</div>"
#
# Now we create the footer. As an example, this is where I keep my analytics JS and copyright notice. Y
# You'll definately want to remove/change that to fit your setup.
# This also has the class "next_article_menu", which gets updated automatically as you add the next article later.
#
footer = """
<footer>
<ul class="next_article_menu">
<li><a href="http://yoursitename.com/blog/index.html">Blog Home</a></li>
</ul>
<p>Your company/site name, Copyright 2018</p>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-1116123414af8253143123f3422-1"></script>
<script>
window.dataLayer = wisndow.dataLayer || [];
functsion gtag(){dataLayer.pussh(arguments);}
gtasg('js', new Dsate());
gtasg('config', 'UAs-11923452-2311');
</script>
</footer>
</body>
</html>"""
body += "\n"+footer
# Do a quick check to make sure your intended title tag was picked up. Exit if not.
if title_tag != "My Blog Default":
filename = title_tag.replace(" ", "-").strip().strip('.').strip('?').strip('!')
with open('www/blog/{}.html'.format(filename), 'w') as f:
f.write(body)
return filename, filename+".html"
else:
print("No title was set. Exiting.")
exit(1)
######################
# Blog Index updater #
######################
def updateIndex(output_url, title_tag):
import datetime
#
# Assumes directory structure of "www" ---> "blog" ---> "index.html", which holds the index list of all blog articles.
#
indexfile = open('www/blog/index.html', 'r')
indexcontent = indexfile.read()
print(indexcontent)
doc = html.document_fromstring(indexcontent)
today = datetime.date.today().isoformat()
# Add list item to the blog bloglist
title_tag = title_tag.replace('-', ' ')
new_item = html.fragment_fromstring(' <li>{} :: <a href = "{}">{}</a></li>'.format(today, output_url, title_tag))
##print(new_item)
listlength = len(doc.xpath("//div[@class='container']/ul/li"))
##print("listlength: ", listlength)
doc.xpath("//div[@class='container']/ul/li")[listlength-1].addnext(new_item)
print("length of list after adding article: ", len(doc.xpath("//div[@class='container']/ul/li")))
# Test that the addition was successful...
if len(doc.xpath("//div[@class='container']/ul/li")) != (listlength + 1):
print("Something went wrong when trying to add new article to the index. Exiting...")
exit(1)
# Replace date string
today = datetime.datetime.today()
#
# I'm located in U.S. Mountain time zone... You'll want to update below to fit you situation accordingly.
#
date_string = 'Last updated on {:%A, %B %d %Y, at %I:%M %p, U.S. Mountain Time}'.format(today)
doc.xpath("//div[@id='update_string']")[0].text = date_string
# Convert to doc to string
outfile = html.tostring(doc, pretty_print=True, encoding='unicode')
with open('www/blog/index.html', 'w') as f:
f.write(outfile)
previous_blog_index = listlength - 1
##print(doc.xpath("//div[@class='container']/ul/li"))
# Get URL for previous blog.
previous_blog = doc.xpath("//div[@class='container']/ul/li/a")[previous_blog_index].attrib.items()[0][1]
return previous_blog
############################
# Previous Article Updater #
############################
def updatePreviousHtml(previous_blog, output_url, title):
'''
This function receives the URL for the most recent prior blog, along with the currently created blog article.
It then opens up the html of the previous, updates its footer with our most recent article's URL, enabling
the automatic linking of newly created articles.
TODO: Add a "previous article" feature. Right now progress is only forward; there's not option
(other than the back button in the browser) to go to previous article.
'''
# Open previous blog and read its contents.
unchanged_string = ""
with open("www/blog/{}".format(previous_blog), 'r') as f:
unchanged_string = f.read()
# convert to an lxml tree structure for further manipulation.
parsed = html.document_fromstring(unchanged_string)
# extract out the menu class from the footer; returns list
menu_section = parsed.xpath("//ul[@class='next_article_menu']/li")
# get length of list. Helpful when referencing index and checking status.
length = len(menu_section)
# Create fragment for insertion
newlink = html.fragment_fromstring(" <li><a href='http://yoursitenamehere.com/blog/{}'>Next: {}</a></li>".format(output_url, title))
print("newlink text: ", newlink.text)
# Insert fragment
print("pre-add length: ", len(parsed.xpath("//ul[@class='next_article_menu']/li")))
menu_section[length-1].addnext(newlink)
print("post-add length: ", len(parsed.xpath("//ul[@class='next_article_menu']/li")))
# Check that insert was successful
if (length + 1) != len(parsed.xpath("//ul[@class='next_article_menu']/li")):
print("Something when wrong when linking previous article to newest article. Exiting...")
exit(1)
# Convert to string before writing out
outfile = html.tostring(parsed, pretty_print=True, encoding='unicode')
# Add link to next article
with open("www/blog/{}".format(previous_blog), 'w') as f:
f.write(outfile)
return True
########################
# Main Program Section #
########################
inputdir = 'toproc'
# Find files to be processed. Returns a list.
to_proc = glob.glob('{}/*.txt'.format(inputdir))
# Most of the time, this will have a single file to process, but just in case...
for document in to_proc:
title, output_url = generateHtml(document)
previous_blog = updateIndex(output_url, title)
updatePreviousHtml(previous_blog, output_url, title)
print("Completed processing article. Please rsync blog directory with server.")
# Now cleanup our processed input articles...
print("Cleaning up input files")
# Get list of all files
to_del = glob.glob('{}/*'.format(inputdir))
# then 'unlink' (delete) them.
for document in to_proc:
os.unlink(document)
print("Finished cleaning.")
print("Exiting with success status.")
exit(0)