");
if (end != nullptr){
ptdesc = end+6;
if (*ptdesc == '\n') ptdesc++;
}else{
ptdesc += strlen(ptdesc);
}
}else if (rssd_ncmp(ptdesc,CONST_STR("
");
if (end != nullptr){
ptdesc = end+1;
if (*ptdesc == '\n') ptdesc++;
}else{
ptdesc += strlen(ptdesc);
}
}else{
const char *pt = strcasestr(ptdesc,"src=");
if (pt != nullptr){
pt += 4;
if (*pt == '"') pt++;
const char *end;
if (is_start_any_ofnc(pt,end,"http://","https://")){
while (*end != '\0' && *end > ' ' && *end != '>' && *end != '"') end++;
ret += "_IMG=" + string(pt,end-pt) + " ";
if (*end == '"') end++;
while (*end != '\0' && *end != '>') end++;
if (*end == '>') end++;
ptdesc = end;
}
}
}
}else if (rssd_ncmp(ptdesc,CONST_STR("
");
if (end != nullptr){
ptdesc = end+4;
if (*ptdesc == '\n') ptdesc++;
}else{
ptdesc += strlen(ptdesc);
}
}else if (rssd_ncmp(ptdesc,CONST_STR("");
if (end != nullptr){
ptdesc = end+4;
if (*ptdesc == '\n') ptdesc++;
}else{
ptdesc += strlen(ptdesc);
}
}else if (options.remove_h && is_start_any_ofnc(ptdesc,pt
,"
","","",""
,"
","","","")){
ptdesc = pt;
}else if (rssd_ncmp(ptdesc,CONST_STR("");
if (end != nullptr){
ptdesc = end+5;
if (*ptdesc == '\n') ptdesc++;
}else{
ptdesc += strlen(ptdesc);
}
}else if (rssd_ncmp(ptdesc,CONST_STR("
const char *end = strstr(ptdesc,"");
if (end != nullptr){
ptdesc = end+9;
if (*ptdesc == '\n') ptdesc++;
}else{
ptdesc += strlen(ptdesc);
}
}else{
ret += *ptdesc++;
}
}
if (options.remove_comma){
// After using various remove_ options, we end up with a bunch of commas.
while (true){
auto pos = ret.find(",,");
if (pos == string::npos){
break;
}else{
unsigned start = pos;
while (ret[pos] == ',') pos++;
ret.replace(start,pos-start,"");
}
}
while (true){
auto pos = ret.find(" ");
if (pos == string::npos){
break;
}else{
ret.replace(pos,6,"");
}
}
while (true){
auto pos = ret.find("\t|");
if (pos == string::npos){
break;
}else{
ret.replace(pos,2,"");
}
}
}
// Remove spaces at the end and front
strip_end (ret);
for (unsigned i=0; i 0) ret.replace (0,i,"");
break;
}
}
return ret;
}
static const char *savexmldir = "/tmp/savexml";
static const char *savelinkdir = "/var/lib/rssd/savelink";
static const char *tmp_dir = "/tmp";
static const char *force_account = NULL; // For testing, all messages are sent to this account
struct PROCESS_STATS{
unsigned newmsgs=0;
unsigned keepmsgs=0;
};
static int rssd_process (CONNECT_INFO &con, RSSD_CONFIG &config, PARAM_STRING name, string &msg, bool verbose, PROCESS_STATS &stats)
{
glocal bool verbose = verbose;
glocal PROCESS_STATS *stats = &stats;
glocal CONNECT_INFO *con = &con;
int ret = -1;
bool found = false;
for (auto &r:config.websites){
if (r.account == name.ptr){
found = true;
glocal WEBSITE *r = &r;
glocal string rss;
if (r.session.empty()){
tlmp_error (MSG_U(E_NOSESSSION,"Session not set for account %s\n"),name.ptr);
break;
}
if (rssd_getrss(r.rssurl,glocal.rss)==-1){
tlmp_error (MSG_U(E_GETRSS,"Error getting the RSS for account %s\n"),name.ptr);
ret = 0; // We have to continue for other accounts. This is a network
// error probably and this will fix itself next time.
}else{
if (savexmldir != NULL){
mkdir (savexmldir,0755);
(string_f("%s/%s.xml",savexmldir,name.ptr),false);
fprintf (fout,"%s",glocal.rss.c_str());
return 0;
}
// Load the link already process for that site
mkdir (savelinkdir,0755);
glocal map links;
glocal time_t now = time(NULL);
string fname = string_f("%s/%s.links",savelinkdir,name.ptr);
(fname,true);
// Not a problem. this files is created on the fly
// Older versions had the date and the link as the key, but it was not reliable.
// Now, we only keep the URL.
unsigned timestamp = atoi(line);
line = str_skipdig(line);
line = str_skip(line);
if (line[0] != '\0') glocal.links[line] = timestamp;
return 0;
(glocal.rss);
string fulllink = link;
strip_end (fulllink);
if (!is_start_any_ofnc(fulllink,NONEED,"http://","https://")){
// This is a relative URL, we use the siteurl to make it complete
fulllink = glocal.r->siteurl+fulllink;
}
auto mlink = glocal.links.find(fulllink);
if (mlink == glocal.links.end()){
string formatdesc = rssd_formatdesc(description,glocal.r->options);
if (glocal.verbose) printf (MSG_U(I_GENMSG,"Create a message for link %s\n"),fulllink.c_str());
glocal string content;
glocal.content = string_f("%s\n%s\n%s",title.c_str(),fulllink.c_str(),formatdesc.c_str());
glocal bool success = false;
vector empty;
BOB_TYPE content(glocal.content.c_str(),glocal.content.size(),false);
const char *groupowner = force_account != NULL ? force_account : glocal.r->account.c_str();
(*glocal.con,glocal.r->session,"",empty,"public",groupowner,content,false,"","","");
if (!success) tlmp_error (MSG_U(E_RSSSENDFAIL,"Can't publish the RSS message: %s\n"),msg);
glocal.success = success;
if (glocal.success) glocal.links[fulllink] = glocal.now;
string textfile = string_f("%s/rss.txt",tmp_dir);
(textfile,false);
fprintf (fout,"%s",glocal.content.c_str());
return 0;
glocal.stats->newmsgs++;
}else{
if (glocal.verbose) printf (MSG_U(I_ALREADY,"Old message ignored for link %s\n"),fulllink.c_str());
mlink->second = glocal.now; // Whatever is in the RSS is kept current.
glocal.stats->keepmsgs++;
}
// Save the links
(fname,false);
// We keep old links around for 30 days
time_t old = time(NULL)-(30*24*60*60);
for (auto const &s:glocal.links){
if (s.second > old) fprintf (fout,"%u %s\n",s.second,s.first.c_str());
}
return 0;
ret = 0;
}
break;
}
}
if (!found) msg = string_f(MSG_U(E_WEBSITENOTFOUND,"RSS account %s not found in configuration"),name.ptr);
return ret;
}
int main (int argc, char *argv[])
{
glocal int ret = -1;
glocal const char *configfile = "/etc/bolixo/rssd.conf.d";
glocal const char *sessionsfile = "/var/run/sssd-sessions";
glocal const char *control = "/var/run/rssd.sock";
glocal const char *bod_sock = "/dev/bod.sock";
glocal const char *user = "bolixo";
glocal bool daemon = false;
glocal bool testmode = false;
glocal bool test_summary = false;
glocal bool process = false;
glocal bool printconf = false;
glocal bool printaccounts = false;
glocal bool server = false;
glocal const char *mysecret = "";
glocal const char *pidfile = "/var/run/rssd.pid";
glocal const char *printfield = NULL;
glocal const char *account = NULL;
static const char *tbdic[]={"bolixo",NULL};
glocal.ret = (argc,argv,tbdic);
setproginfo ("rssd",VERSION,"Convert RSS feeds into bolixo messages");
setarg (' ',"config",MSG_U(O_CONFIGPATH,"Configuration file path"),glocal.configfile,false);
setarg (' ',"sessions",MSG_U(O_SESSIONS,"Session file"),glocal.sessionsfile,false);
setgrouparg ("Networking");
setarg ('c',"control","Unix socket for rssd-control",glocal.control,false);
setarg (' ',"bod_sock","Unix socket to reach the bod server",glocal.bod_sock,false);
setarg (' ',"mysecret","Secret used to talk with bod",glocal.mysecret,false);
setgrouparg ("Directories");
setarg (' ',"savexml","Save retrieved XML file (for review/debug)",savexmldir,false);
setarg (' ',"savelink","Save known links about a site",savelinkdir,false);
setgrouparg ("Misc.");
setarg (' ',"server","Run as a server",glocal.server,false);
setarg (' ',"user","Run the program as this user",glocal.user,false);
setarg (' ',"daemon","Run in background",glocal.daemon,false);
setarg (' ',"pidfile","File holding the PID of the process",glocal.pidfile,false);
setgrouparg ("Scripting");
setarg (' ',"printaccounts","Print all accounts id",glocal.printaccounts,false);
setarg (' ',"printfield","Print one information about an account",glocal.printfield,false);
setarg (' ',"account","Account select for printfield",glocal.account,false);
setgrouparg ("Tests");
setarg (' ',"testmode","Execution inline of some part of the rssd server",glocal.testmode,false);
setarg (' ',"test_summary","Do not format rss, just print the fields",glocal.test_summary,false);
setarg (' ',"process","Process some websites",glocal.process,false);
setarg (' ',"printconf","Print configuration",glocal.printconf,false);
setarg (' ',"force_account","Send all messages to this account",force_account,false);
if (glocal.daemon){
syslog (LOG_ERR,"%s",msg);
}else{
fprintf (stderr,"%s",msg);
}
if (glocal.daemon){
syslog (LOG_WARNING,"%s",msg);
}else{
fprintf (stderr,"%s",msg);
}
// Test mode.
int ret = -1;
if (!glocal.testmode && !glocal.process){
usage();
}else if (glocal.process){
CONNECT_INFO con; // Connection to bod
con.port = glocal.bod_sock;
con.secret = glocal.mysecret;
RSSD_CONFIG config;
config.read(glocal.configfile);
config.readsessions (glocal.sessionsfile);
unsigned nbproc=0;
PROCESS_STATS stats;
ret = 0;
for (int i=0; i(arg,false);
glocal.rss += line;
return 0;
(glocal.rss);
printf ("------------------------------------------------------------------------\n");
const char *ptdesc = description.c_str();
const char *pt = strchr(ptdesc,'\n');
string oneline;
if (pt == NULL){
oneline = description;
}else{
oneline = string(ptdesc,pt-ptdesc);
}
printf ("title=%s\nlink=%s\npubdate=%s\nguid=%s\ndescription=%s\ncontent=%s\n"
,title.c_str(),link.c_str(),pubdate.c_str(),guid.c_str(),oneline.c_str(),content.c_str());
if (!glocal.test_summary){
printf ("===================\n");
WEBSITE_OPTIONS options;
options.remove_a = true;
options.remove_p = true;
options.remove_h6 = true;
options.remove_comma = true;
string formatdesc = rssd_formatdesc (description,options);
printf ("%s\n%s\n%s",title.c_str(),link.c_str(),formatdesc.c_str());
}
}
}
}
return ret;
int ret = -1;
if (glocal.printconf){
RSSD_CONFIG config;
config.read(glocal.configfile);
for (auto const &w:config.websites){
printf ("account:%s\n",w.account.c_str());
printf ("\tsiteurl:%s\n",w.siteurl.c_str());
printf ("\trssurl:%s\n",w.rssurl.c_str());
printf ("\tlang:%s\n",w.lang.c_str());
printf ("\tname:%s\n",w.name.c_str());
printf ("\tdeptname:%s\n",w.deptname.c_str());
printf ("\tcountry:%s\n",w.country.c_str());
printf ("\tstate:%s\n",w.state.c_str());
printf ("\tcity:%s\n",w.city.c_str());
printf ("\tphotourl:%s\n",w.photo_url.c_str());
printf ("\tmini-photourl:%s\n",w.mini_photo_url.c_str());
printf ("\toptions:"); w.printfield("options");
}
ret = 0;
}else if (glocal.printaccounts){
RSSD_CONFIG config;
config.read(glocal.configfile);
for (auto const &w:config.websites){
if (w.rssurl.size() > 0){
if (w.siteurl.empty()
|| w.mini_photo_url.empty()
|| w.photo_url.empty()
|| w.country.empty()){
tlmp_error (MSG_U(E_INCOMPLETEACC,"Incomplete RSS account %s\n"),w.account.c_str());
}else{
printf ("%s\n",w.account.c_str());
}
}
}
ret = 0;
}else if (glocal.printfield != NULL){
RSSD_CONFIG config;
config.read(glocal.configfile);
if (glocal.account == NULL){
for (auto const &w:config.websites){
w.printfield(glocal.printfield);
}
}else{
const auto &w = find_if(config.websites.begin(),config.websites.end(),[this](const auto &w){
return w.account == glocal.account;
});
if (w != config.websites.end()){
w->printfield(glocal.printfield);
}
}
}else if (glocal.server){
glocal RSSD_CONFIG config;
glocal CONNECT_INFO con; // Connection to bod
glocal string controlport = string_f("unix:%s",glocal.control);
glocal.con.port = glocal.bod_sock;
glocal.con.secret = glocal.mysecret;
();
HANDLE_INFO *n = new HANDLE_INFO;
info.data = n;
// tlmp_error ("port=%s control=%s client=%s\n",info.port,glocal.controlport.c_str(),glocal.clientport.c_str());
if (string_cmp(info.port,glocal.controlport)==0){
n->type = TYPE_CONTROL;
}
debug_printf (D_PROTO,"receive line: %s\n",line);
HANDLE_INFO *c = (HANDLE_INFO*)info.data;
static const char *tbtype[]={"none","control request","client request", "worker request"};
ERROR_PREFIX prefix ("%s: ",tbtype[c->type]);
if (c->type == TYPE_CONTROL){
(this,c->req,line, info.linelen,endserver, endclient, no,c);
vector tb;
tb.push_back(string_f ("Version %s",VERSION));
rep_status(tb);
endserver = true;
if (on){
debug_seton();
}else{
debug_setoff();
}
debug_setfdebug (filename);
// connectto port send = lines:v
glocal const char *send = send;
glocal vector lines;
// We want to test publishd connectivity to the outside
(connectto,port,5);
sendf ("%s\n",glocal.send);
glocal.lines.push_back(line);
end = true;
glocal.lines.emplace_back(string_f("fail: %s\n",strerror(errno)));
rep_help_connect (glocal.lines);
bool success = true;
string msg;
glocal.config.read (glocal.configfile);
glocal.config.readsessions (glocal.sessionsfile);
PROCESS_STATS stats;
if (websites.size() == 0){
for (auto const &r:glocal.config.websites){
if (rssd_process(glocal.con,glocal.config,r.account,msg,false,stats)==-1){
success = false;
break;
}
}
}else{
for (auto r:websites){
if (rssd_process(glocal.con,glocal.config,r,msg,false,stats)==-1){
success = false;
break;
}
}
}
rep_process (success,msg);
glocal bool bod = false;
rep_test (glocal.bod);
tlmp_error ("Invalid command: %s\n",line);
endclient = true;
}
bool some_errors = false;
if (fdpass_setcontrol(s,glocal.control,glocal.user)==-1){
some_errors = true;
}
if (!some_errors && s.is_ok()){
s.setrawmode(true);
if (glocal.daemon){
daemon_init(glocal.pidfile,glocal.user);
}
s.loop();
ret = 0;
}
}else{
usage();
}
return ret;
return glocal.ret;
}