Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deprecated method File.exists? to File.exist? #159

Merged
merged 2 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/docsplit/image_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def convert(pdf, size, format, previous=nil)
directory = directory_for(size)
pages = @pages || '1-' + Docsplit.extract_length(pdf).to_s
escaped_pdf = ESCAPE[pdf]
FileUtils.mkdir_p(directory) unless File.exists?(directory)
FileUtils.mkdir_p(directory) unless File.exist?(directory)
common = "#{MEMORY_ARGS} -density #{@density} #{resize_arg(size)} #{quality_arg(format)}"
if previous
FileUtils.cp(Dir[directory_for(previous) + '/*'], directory)
Expand All @@ -48,7 +48,7 @@ def convert(pdf, size, format, previous=nil)
end
end
ensure
FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
FileUtils.remove_entry_secure tempdir if File.exist?(tempdir)
end


Expand Down
6 changes: 3 additions & 3 deletions lib/docsplit/page_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ def extract(pdfs, opts)
[pdfs].flatten.each do |pdf|
pdf_name = File.basename(pdf, File.extname(pdf))
page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
FileUtils.mkdir_p @output unless File.exists?(@output)
FileUtils.mkdir_p @output unless File.exist?(@output)

cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
"pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
else
"pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
end
result = `#{cmd}`.chomp
FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
FileUtils.rm('doc_data.txt') if File.exist?('doc_data.txt')
raise ExtractionFailed, result if $? != 0
result
end
Expand Down
30 changes: 15 additions & 15 deletions lib/docsplit/pdf_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def osx?
def linux?
!!HOST_OS.match(/linux/i)
end

# The first line of the help output holds the name and version number
# of the office software to be used for extraction.
def version_string
Expand All @@ -35,10 +35,10 @@ def libre_office?
def open_office?
!!version_string.match(/^OpenOffice.org/)
end

# A set of default locations to search for office software
# These have been extracted from JODConverter. Each listed
# path should contain a directory "program" which in turn
# path should contain a directory "program" which in turn
# contains the "soffice" executable.
# see: https://github.com/mirkonasato/jodconverter/blob/master/jodconverter-core/src/main/java/org/artofsolving/jodconverter/office/OfficeUtils.java#L63-L91
def office_search_paths
Expand Down Expand Up @@ -69,7 +69,7 @@ def office_search_paths
end
search_paths
end

# Identify the path to a working office executable.
def office_executable
paths = office_search_paths
Expand All @@ -78,10 +78,10 @@ def office_executable
# raise an error if that path isn't valid, otherwise, add
# it to the front of our search paths.
if ENV['OFFICE_PATH']
raise ArgumentError, "No such file or directory #{ENV['OFFICE_PATH']}" unless File.exists? ENV['OFFICE_PATH']
raise ArgumentError, "No such file or directory #{ENV['OFFICE_PATH']}" unless File.exist? ENV['OFFICE_PATH']
paths.unshift(ENV['OFFICE_PATH'])
end

# The location of the office executable is OS dependent
path_pieces = ["soffice"]
if windows?
Expand All @@ -91,32 +91,32 @@ def office_executable
else
path_pieces += [["program", "soffice"]]
end

# Search for the first suitable office executable
# and short circuit an executable is found.
paths.each do |path|
if File.exists? path
if File.exist? path
@@executable ||= path unless File.directory? path
path_pieces.each do |pieces|
check_path = File.join(path, pieces)
@@executable ||= check_path if File.exists? check_path
@@executable ||= check_path if File.exist? check_path
end
end
break if @@executable
end
raise OfficeNotFound, "No office software found" unless @@executable
@@executable
end

# Used to specify the office location for JODConverter
def office_path
File.dirname(File.dirname(office_executable))
end

# Convert documents to PDF.
def extract(docs, opts)
out = opts[:output] || '.'
FileUtils.mkdir_p out unless File.exists?(out)
FileUtils.mkdir_p out unless File.exist?(out)
[docs].flatten.each do |doc|
ext = File.extname(doc)
basename = File.basename(doc, ext)
Expand All @@ -128,7 +128,7 @@ def extract(docs, opts)
if libre_office?
# Set the LibreOffice user profile, so that parallel uses of cloudcrowd don't trip over each other.
ENV['SYSUSERCONFIG']="file://#{File.expand_path(escaped_out)}"

options = "--headless --invisible --norestore --nolockcheck --convert-to pdf --outdir #{escaped_out} #{escaped_doc}"
cmd = "#{office_executable} #{options} 2>&1"
result = `#{cmd}`.chomp
Expand All @@ -147,9 +147,9 @@ def extract(docs, opts)
LOGGING = "-Djava.util.logging.config.file=#{ESCAPED_ROOT}/vendor/logging.properties"

HEADLESS = "-Djava.awt.headless=true"

private

# Runs a Java command, with quieted logging, and the classpath set properly.
def run_jod(command, pdfs, opts, return_output=false)

Expand Down
4 changes: 2 additions & 2 deletions lib/docsplit/text_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def initialize
# Extract text from a list of PDFs.
def extract(pdfs, opts)
extract_options opts
FileUtils.mkdir_p @output unless File.exists?(@output)
FileUtils.mkdir_p @output unless File.exist?(@output)
[pdfs].flatten.each do |pdf|
@pdf_name = File.basename(pdf, File.extname(pdf))
pages = (@pages == 'all') ? 1..Docsplit.extract_length(pdf) : @pages
Expand Down Expand Up @@ -80,7 +80,7 @@ def extract_from_ocr(pdf, pages)
clean_text(base_path + '.txt') if @clean_ocr
end
ensure
FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
FileUtils.remove_entry_secure tempdir if File.exist?(tempdir)
end


Expand Down
2 changes: 1 addition & 1 deletion test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Minitest::Test
OUTPUT = 'test/output'

def clear_output
FileUtils.rm_r(OUTPUT) if File.exists?(OUTPUT)
FileUtils.rm_r(OUTPUT) if File.exist?(OUTPUT)
end

def teardown
Expand Down